diff --git a/tools/explorer/test/models/llama_attention_no_rot_emb_ttir.mlir b/tools/explorer/test/models/llama_attention_no_rot_emb_ttir.mlir
new file mode 100644
index 000000000..0c1cea1db
--- /dev/null
+++ b/tools/explorer/test/models/llama_attention_no_rot_emb_ttir.mlir
@@ -0,0 +1,120 @@
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>
+#loc = loc("SelfAttention":0:0)
+module @SelfAttention attributes {} {
+  func.func @forward(%arg0: tensor<1x12x3200xf32> {ttir.name = "hidden_states_1"} loc("SelfAttention":0:0), %arg1: tensor<1x1x12x12xf32> {ttir.name = "attention_mask"} loc("SelfAttention":0:0), %arg2: tensor<1xf32> {ttir.name = "input_1_multiply_20"} loc("SelfAttention":0:0), %arg3: tensor<3200x3200xf32> {ttir.name = "model.q_proj.weight"} loc("SelfAttention":0:0), %arg4: tensor<3200x3200xf32> {ttir.name = "model.k_proj.weight"} loc("SelfAttention":0:0), %arg5: tensor<3200x3200xf32> {ttir.name = "model.v_proj.weight"} loc("SelfAttention":0:0), %arg6: tensor<3200x3200xf32> {ttir.name = "model.o_proj.weight"} loc("SelfAttention":0:0)) -> (tensor<1x12x3200xf32> {ttir.name = "SelfAttention.output_reshape_38"}) {
+    %0 = tensor.empty() : tensor<12x3200xf32> loc(#loc30)
+    %1 = "ttir.squeeze"(%arg0, %0) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc30)
+    %2 = tensor.empty() : tensor<12x3200xf32> loc(#loc31)
+    %3 = "ttir.matmul"(%1, %arg3, %2) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc31)
+    %4 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc32)
+    %5 = "ttir.reshape"(%3, %4) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc32)
+    %6 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc33)
+    %7 = "ttir.transpose"(%5, %6) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc33)
+    %8 = tensor.empty() : tensor<32x12x100xf32> loc(#loc34)
+    %9 = "ttir.squeeze"(%7, %8) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc34)
+    %10 = tensor.empty() : tensor<12x3200xf32> loc(#loc35)
+    %11 = "ttir.matmul"(%1, %arg4, %10) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc35)
+    %12 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc36)
+    %13 = "ttir.reshape"(%11, %12) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc36)
+    %14 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc37)
+    %15 = "ttir.transpose"(%13, %14) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc37)
+    %16 = tensor.empty() : tensor<32x12x100xf32> loc(#loc38)
+    %17 = "ttir.squeeze"(%15, %16) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc38)
+    %18 = tensor.empty() : tensor<32x100x12xf32> loc(#loc39)
+    %19 = "ttir.transpose"(%17, %18) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc39)
+    %20 = tensor.empty() : tensor<32x12x12xf32> loc(#loc40)
+    %21 = "ttir.matmul"(%9, %19, %20) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc40)
+    %22 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc41)
+    %23 = "ttir.unsqueeze"(%21, %22) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc41)
+    %24 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc42)
+    %25 = "ttir.multiply"(%23, %arg2, %24) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc42)
+    %26 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc43)
+    %27 = "ttir.add"(%25, %arg1, %26) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc43)
+    %28 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc44)
+    %29 = "ttir.softmax"(%27, %28) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc44)
+    %30 = tensor.empty() : tensor<32x12x12xf32> loc(#loc45)
+    %31 = "ttir.squeeze"(%29, %30) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc45)
+    %32 = tensor.empty() : tensor<12x3200xf32> loc(#loc46)
+    %33 = "ttir.matmul"(%1, %arg5, %32) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc46)
+    %34 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc47)
+    %35 = "ttir.reshape"(%33, %34) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc47)
+    %36 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc48)
+    %37 = "ttir.transpose"(%35, %36) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc48)
+    %38 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc49)
+    %39 = "ttir.transpose"(%37, %38) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc49)
+    %40 = tensor.empty() : tensor<32x100x12xf32> loc(#loc50)
+    %41 = "ttir.squeeze"(%39, %40) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc50)
+    %42 = tensor.empty() : tensor<32x12x100xf32> loc(#loc51)
+    %43 = "ttir.transpose"(%41, %42) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc51)
+    %44 = tensor.empty() : tensor<32x12x100xf32> loc(#loc52)
+    %45 = "ttir.matmul"(%31, %43, %44) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc52)
+    %46 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc53)
+    %47 = "ttir.unsqueeze"(%45, %46) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc53)
+    %48 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc54)
+    %49 = "ttir.transpose"(%47, %48) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc54)
+    %50 = tensor.empty() : tensor<12x3200xf32> loc(#loc55)
+    %51 = "ttir.reshape"(%49, %50) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc55)
+    %52 = tensor.empty() : tensor<12x3200xf32> loc(#loc56)
+    %53 = "ttir.matmul"(%51, %arg6, %52) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc56)
+    %54 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc57)
+    %55 = "ttir.unsqueeze"(%53, %54) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc57)
+    return %55 : tensor<1x12x3200xf32> loc(#loc29)
+  } loc(#loc)
+} loc(#loc)
+#loc1 = loc("forward":4294967295:63)
+#loc2 = loc("forward":4294967295:65)
+#loc3 = loc("forward":4294967295:66)
+#loc4 = loc("forward":4294967295:67)
+#loc5 = loc("forward":4294967295:68)
+#loc6 = loc("forward":4294967295:70)
+#loc7 = loc("forward":4294967295:71)
+#loc8 = loc("forward":4294967295:72)
+#loc9 = loc("forward":4294967295:73)
+#loc10 = loc("forward":4294967295:74)
+#loc11 = loc("forward":4294967295:75)
+#loc12 = loc("forward":4294967295:76)
+#loc13 = loc("forward":4294967295:78)
+#loc14 = loc("forward":4294967295:80)
+#loc15 = loc("forward":4294967295:81)
+#loc16 = loc("forward":4294967295:82)
+#loc17 = loc("forward":4294967295:84)
+#loc18 = loc("forward":4294967295:85)
+#loc19 = loc("forward":4294967295:86)
+#loc20 = loc("forward":4294967295:87)
+#loc21 = loc("forward":4294967295:88)
+#loc22 = loc("forward":4294967295:89)
+#loc23 = loc("forward":4294967295:90)
+#loc24 = loc("forward":4294967295:91)
+#loc25 = loc("forward":4294967295:92)
+#loc26 = loc("forward":4294967295:93)
+#loc27 = loc("forward":4294967295:95)
+#loc28 = loc("forward":4294967295:96)
+#loc29 = loc(unknown)
+#loc30 = loc("reshape_6.dc.squeeze.0"(#loc1))
+#loc31 = loc("matmul_8"(#loc2))
+#loc32 = loc("reshape_9"(#loc3))
+#loc33 = loc("transpose_10"(#loc4))
+#loc34 = loc("reshape_11.dc.squeeze.0"(#loc5))
+#loc35 = loc("matmul_13"(#loc6))
+#loc36 = loc("reshape_14"(#loc7))
+#loc37 = loc("transpose_15"(#loc8))
+#loc38 = loc("reshape_16.dc.squeeze.0"(#loc9))
+#loc39 = loc("transpose_17"(#loc10))
+#loc40 = loc("matmul_18"(#loc11))
+#loc41 = loc("reshape_19.dc.unsqueeze.0"(#loc12))
+#loc42 = loc("multiply_20"(#loc13))
+#loc43 = loc("add_21"(#loc14))
+#loc44 = loc("softmax_22"(#loc15))
+#loc45 = loc("reshape_24.dc.squeeze.0"(#loc16))
+#loc46 = loc("matmul_26"(#loc17))
+#loc47 = loc("reshape_27"(#loc18))
+#loc48 = loc("transpose_28"(#loc19))
+#loc49 = loc("transpose_29"(#loc20))
+#loc50 = loc("reshape_30.dc.squeeze.0"(#loc21))
+#loc51 = loc("transpose_31"(#loc22))
+#loc52 = loc("matmul_32"(#loc23))
+#loc53 = loc("reshape_33.dc.unsqueeze.0"(#loc24))
+#loc54 = loc("transpose_34"(#loc25))
+#loc55 = loc("reshape_35"(#loc26))
+#loc56 = loc("matmul_37"(#loc27))
+#loc57 = loc("reshape_38.dc.unsqueeze.0"(#loc28))
diff --git a/tools/explorer/test/models/open_llama_3b_single_layer.mlir b/tools/explorer/test/models/open_llama_3b_single_layer.mlir
index 5e17dc39e..677aeb3c7 100644
--- a/tools/explorer/test/models/open_llama_3b_single_layer.mlir
+++ b/tools/explorer/test/models/open_llama_3b_single_layer.mlir
@@ -1,7 +1,6 @@
 #any_device = #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>
 #loc = loc("LlamaForCausalLM":0:0)
-#system_desc = #tt.system_desc<[{role = host, target_triple = "x86_64-pc-linux-gnu"}], [{arch = <wormhole_b0>, grid = 8x8, l1_size = 1499136, num_dram_channels = 12, dram_channel_size = 1073741824, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32, l1_unreserved_base = 1024, erisc_l1_unreserved_base = 1024, dram_unreserved_base = 1024, dram_unreserved_end = 1073741824, physical_cores = {worker = [ 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7,  1x0,  1x1,  1x2,  1x3,  1x4,  1x5,  1x6,  1x7,  2x0,  2x1,  2x2,  2x3,  2x4,  2x5,  2x6,  2x7,  3x0,  3x1,  3x2,  3x3,  3x4,  3x5,  3x6,  3x7,  4x0,  4x1,  4x2,  4x3,  4x4,  4x5,  4x6,  4x7,  5x0,  5x1,  5x2,  5x3,  5x4,  5x5,  5x6,  5x7,  6x0,  6x1,  6x2,  6x3,  6x4,  6x5,  6x6,  6x7,  7x0,  7x1,  7x2,  7x3,  7x4,  7x5,  7x6,  7x7] dram = [ 8x0,  9x0,  10x0,  8x1,  9x1,  10x1,  8x2,  9x2,  10x2,  8x3,  9x3,  10x3]}, supported_data_types = [<f32>, <f16>, <bf16>, <bfp_f8>, <bfp_bf8>, <bfp_f4>, <bfp_bf4>, <bfp_f2>, <bfp_bf2>, <u32>, <u16>, <u8>], supported_tile_sizes = [ 4x16,  16x16,  32x16,  4x32,  16x32,  32x32], num_cbs = 32}], [0], [3 : i32], [ 0x0x0x0]>
-module @LlamaForCausalLM attributes {tt.system_desc = #system_desc} {
+module @LlamaForCausalLM attributes {} {
   func.func @forward(%arg0: tensor<1x12xi32> {ttir.name = "input_1"} loc("LlamaForCausalLM":0:0), %arg1: tensor<1xf32> {ttir.name = "input_1_add_4"} loc("LlamaForCausalLM":0:0), %arg2: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_14"} loc("LlamaForCausalLM":0:0), %arg3: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_24.1"} loc("LlamaForCausalLM":0:0), %arg4: tensor<1xf32> {ttir.name = "input_1_multiply_25"} loc("LlamaForCausalLM":0:0), %arg5: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_26.1"} loc("LlamaForCausalLM":0:0), %arg6: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_38.1"} loc("LlamaForCausalLM":0:0), %arg7: tensor<1xf32> {ttir.name = "input_1_multiply_39"} loc("LlamaForCausalLM":0:0), %arg8: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_40.1"} loc("LlamaForCausalLM":0:0), %arg9: tensor<1xf32> {ttir.name = "input_1_multiply_48"} loc("LlamaForCausalLM":0:0), %arg10: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_49"} loc("LlamaForCausalLM":0:0), %arg11: tensor<1xf32> {ttir.name = "input_1_add_70"} loc("LlamaForCausalLM":0:0), %arg12: tensor<1xf32> {ttir.name = "input_1_add_90"} loc("LlamaForCausalLM":0:0), %arg13: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_100"} loc("LlamaForCausalLM":0:0), %arg14: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_110.1"} loc("LlamaForCausalLM":0:0), %arg15: tensor<1xf32> {ttir.name = "input_1_multiply_111"} loc("LlamaForCausalLM":0:0), %arg16: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_112.1"} loc("LlamaForCausalLM":0:0), %arg17: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_124.1"} loc("LlamaForCausalLM":0:0), %arg18: tensor<1xf32> {ttir.name = "input_1_multiply_125"} loc("LlamaForCausalLM":0:0), %arg19: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_126.1"} loc("LlamaForCausalLM":0:0), %arg20: tensor<1xf32> {ttir.name = "input_1_multiply_134"} loc("LlamaForCausalLM":0:0), %arg21: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_135"} loc("LlamaForCausalLM":0:0), %arg22: tensor<1xf32> {ttir.name = "input_1_add_156"} loc("LlamaForCausalLM":0:0), %arg23: tensor<1xf32> {ttir.name = "input_1_add_176"} loc("LlamaForCausalLM":0:0), %arg24: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_186"} loc("LlamaForCausalLM":0:0), %arg25: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_196.1"} loc("LlamaForCausalLM":0:0), %arg26: tensor<1xf32> {ttir.name = "input_1_multiply_197"} loc("LlamaForCausalLM":0:0), %arg27: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_198.1"} loc("LlamaForCausalLM":0:0), %arg28: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_210.1"} loc("LlamaForCausalLM":0:0), %arg29: tensor<1xf32> {ttir.name = "input_1_multiply_211"} loc("LlamaForCausalLM":0:0), %arg30: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_212.1"} loc("LlamaForCausalLM":0:0), %arg31: tensor<1xf32> {ttir.name = "input_1_multiply_220"} loc("LlamaForCausalLM":0:0), %arg32: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_221"} loc("LlamaForCausalLM":0:0), %arg33: tensor<1xf32> {ttir.name = "input_1_add_242"} loc("LlamaForCausalLM":0:0), %arg34: tensor<1xf32> {ttir.name = "input_1_add_262"} loc("LlamaForCausalLM":0:0), %arg35: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_272"} loc("LlamaForCausalLM":0:0), %arg36: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_282.1"} loc("LlamaForCausalLM":0:0), %arg37: tensor<1xf32> {ttir.name = "input_1_multiply_283"} loc("LlamaForCausalLM":0:0), %arg38: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_284.1"} loc("LlamaForCausalLM":0:0), %arg39: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_296.1"} loc("LlamaForCausalLM":0:0), %arg40: tensor<1xf32> {ttir.name = "input_1_multiply_297"} loc("LlamaForCausalLM":0:0), %arg41: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_298.1"} loc("LlamaForCausalLM":0:0), %arg42: tensor<1xf32> {ttir.name = "input_1_multiply_306"} loc("LlamaForCausalLM":0:0), %arg43: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_307"} loc("LlamaForCausalLM":0:0), %arg44: tensor<1xf32> {ttir.name = "input_1_add_328"} loc("LlamaForCausalLM":0:0), %arg45: tensor<1xf32> {ttir.name = "input_1_add_348"} loc("LlamaForCausalLM":0:0), %arg46: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_358"} loc("LlamaForCausalLM":0:0), %arg47: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_368.1"} loc("LlamaForCausalLM":0:0), %arg48: tensor<1xf32> {ttir.name = "input_1_multiply_369"} loc("LlamaForCausalLM":0:0), %arg49: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_370.1"} loc("LlamaForCausalLM":0:0), %arg50: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_382.1"} loc("LlamaForCausalLM":0:0), %arg51: tensor<1xf32> {ttir.name = "input_1_multiply_383"} loc("LlamaForCausalLM":0:0), %arg52: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_384.1"} loc("LlamaForCausalLM":0:0), %arg53: tensor<1xf32> {ttir.name = "input_1_multiply_392"} loc("LlamaForCausalLM":0:0), %arg54: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_393"} loc("LlamaForCausalLM":0:0), %arg55: tensor<1xf32> {ttir.name = "input_1_add_414"} loc("LlamaForCausalLM":0:0), %arg56: tensor<1xf32> {ttir.name = "input_1_add_434"} loc("LlamaForCausalLM":0:0), %arg57: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_444"} loc("LlamaForCausalLM":0:0), %arg58: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_454.1"} loc("LlamaForCausalLM":0:0), %arg59: tensor<1xf32> {ttir.name = "input_1_multiply_455"} loc("LlamaForCausalLM":0:0), %arg60: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_456.1"} loc("LlamaForCausalLM":0:0), %arg61: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_468.1"} loc("LlamaForCausalLM":0:0), %arg62: tensor<1xf32> {ttir.name = "input_1_multiply_469"} loc("LlamaForCausalLM":0:0), %arg63: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_470.1"} loc("LlamaForCausalLM":0:0), %arg64: tensor<1xf32> {ttir.name = "input_1_multiply_478"} loc("LlamaForCausalLM":0:0), %arg65: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_479"} loc("LlamaForCausalLM":0:0), %arg66: tensor<1xf32> {ttir.name = "input_1_add_500"} loc("LlamaForCausalLM":0:0), %arg67: tensor<1xf32> {ttir.name = "input_1_add_520"} loc("LlamaForCausalLM":0:0), %arg68: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_530"} loc("LlamaForCausalLM":0:0), %arg69: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_540.1"} loc("LlamaForCausalLM":0:0), %arg70: tensor<1xf32> {ttir.name = "input_1_multiply_541"} loc("LlamaForCausalLM":0:0), %arg71: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_542.1"} loc("LlamaForCausalLM":0:0), %arg72: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_554.1"} loc("LlamaForCausalLM":0:0), %arg73: tensor<1xf32> {ttir.name = "input_1_multiply_555"} loc("LlamaForCausalLM":0:0), %arg74: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_556.1"} loc("LlamaForCausalLM":0:0), %arg75: tensor<1xf32> {ttir.name = "input_1_multiply_564"} loc("LlamaForCausalLM":0:0), %arg76: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_565"} loc("LlamaForCausalLM":0:0), %arg77: tensor<1xf32> {ttir.name = "input_1_add_586"} loc("LlamaForCausalLM":0:0), %arg78: tensor<1xf32> {ttir.name = "input_1_add_606"} loc("LlamaForCausalLM":0:0), %arg79: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_616"} loc("LlamaForCausalLM":0:0), %arg80: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_626.1"} loc("LlamaForCausalLM":0:0), %arg81: tensor<1xf32> {ttir.name = "input_1_multiply_627"} loc("LlamaForCausalLM":0:0), %arg82: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_628.1"} loc("LlamaForCausalLM":0:0), %arg83: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_640.1"} loc("LlamaForCausalLM":0:0), %arg84: tensor<1xf32> {ttir.name = "input_1_multiply_641"} loc("LlamaForCausalLM":0:0), %arg85: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_642.1"} loc("LlamaForCausalLM":0:0), %arg86: tensor<1xf32> {ttir.name = "input_1_multiply_650"} loc("LlamaForCausalLM":0:0), %arg87: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_651"} loc("LlamaForCausalLM":0:0), %arg88: tensor<1xf32> {ttir.name = "input_1_add_672"} loc("LlamaForCausalLM":0:0), %arg89: tensor<1xf32> {ttir.name = "input_1_add_692"} loc("LlamaForCausalLM":0:0), %arg90: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_702"} loc("LlamaForCausalLM":0:0), %arg91: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_712.1"} loc("LlamaForCausalLM":0:0), %arg92: tensor<1xf32> {ttir.name = "input_1_multiply_713"} loc("LlamaForCausalLM":0:0), %arg93: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_714.1"} loc("LlamaForCausalLM":0:0), %arg94: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_726.1"} loc("LlamaForCausalLM":0:0), %arg95: tensor<1xf32> {ttir.name = "input_1_multiply_727"} loc("LlamaForCausalLM":0:0), %arg96: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_728.1"} loc("LlamaForCausalLM":0:0), %arg97: tensor<1xf32> {ttir.name = "input_1_multiply_736"} loc("LlamaForCausalLM":0:0), %arg98: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_737"} loc("LlamaForCausalLM":0:0), %arg99: tensor<1xf32> {ttir.name = "input_1_add_758"} loc("LlamaForCausalLM":0:0), %arg100: tensor<1xf32> {ttir.name = "input_1_add_778"} loc("LlamaForCausalLM":0:0), %arg101: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_788"} loc("LlamaForCausalLM":0:0), %arg102: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_798.1"} loc("LlamaForCausalLM":0:0), %arg103: tensor<1xf32> {ttir.name = "input_1_multiply_799"} loc("LlamaForCausalLM":0:0), %arg104: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_800.1"} loc("LlamaForCausalLM":0:0), %arg105: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_812.1"} loc("LlamaForCausalLM":0:0), %arg106: tensor<1xf32> {ttir.name = "input_1_multiply_813"} loc("LlamaForCausalLM":0:0), %arg107: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_814.1"} loc("LlamaForCausalLM":0:0), %arg108: tensor<1xf32> {ttir.name = "input_1_multiply_822"} loc("LlamaForCausalLM":0:0), %arg109: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_823"} loc("LlamaForCausalLM":0:0), %arg110: tensor<1xf32> {ttir.name = "input_1_add_844"} loc("LlamaForCausalLM":0:0), %arg111: tensor<1xf32> {ttir.name = "input_1_add_864"} loc("LlamaForCausalLM":0:0), %arg112: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_874"} loc("LlamaForCausalLM":0:0), %arg113: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_884.1"} loc("LlamaForCausalLM":0:0), %arg114: tensor<1xf32> {ttir.name = "input_1_multiply_885"} loc("LlamaForCausalLM":0:0), %arg115: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_886.1"} loc("LlamaForCausalLM":0:0), %arg116: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_898.1"} loc("LlamaForCausalLM":0:0), %arg117: tensor<1xf32> {ttir.name = "input_1_multiply_899"} loc("LlamaForCausalLM":0:0), %arg118: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_900.1"} loc("LlamaForCausalLM":0:0), %arg119: tensor<1xf32> {ttir.name = "input_1_multiply_908"} loc("LlamaForCausalLM":0:0), %arg120: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_909"} loc("LlamaForCausalLM":0:0), %arg121: tensor<1xf32> {ttir.name = "input_1_add_930"} loc("LlamaForCausalLM":0:0), %arg122: tensor<1xf32> {ttir.name = "input_1_add_950"} loc("LlamaForCausalLM":0:0), %arg123: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_960"} loc("LlamaForCausalLM":0:0), %arg124: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_970.1"} loc("LlamaForCausalLM":0:0), %arg125: tensor<1xf32> {ttir.name = "input_1_multiply_971"} loc("LlamaForCausalLM":0:0), %arg126: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_972.1"} loc("LlamaForCausalLM":0:0), %arg127: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_984.1"} loc("LlamaForCausalLM":0:0), %arg128: tensor<1xf32> {ttir.name = "input_1_multiply_985"} loc("LlamaForCausalLM":0:0), %arg129: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_986.1"} loc("LlamaForCausalLM":0:0), %arg130: tensor<1xf32> {ttir.name = "input_1_multiply_994"} loc("LlamaForCausalLM":0:0), %arg131: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_995"} loc("LlamaForCausalLM":0:0), %arg132: tensor<1xf32> {ttir.name = "input_1_add_1016"} loc("LlamaForCausalLM":0:0), %arg133: tensor<1xf32> {ttir.name = "input_1_add_1036"} loc("LlamaForCausalLM":0:0), %arg134: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1046"} loc("LlamaForCausalLM":0:0), %arg135: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1056.1"} loc("LlamaForCausalLM":0:0), %arg136: tensor<1xf32> {ttir.name = "input_1_multiply_1057"} loc("LlamaForCausalLM":0:0), %arg137: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1058.1"} loc("LlamaForCausalLM":0:0), %arg138: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1070.1"} loc("LlamaForCausalLM":0:0), %arg139: tensor<1xf32> {ttir.name = "input_1_multiply_1071"} loc("LlamaForCausalLM":0:0), %arg140: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1072.1"} loc("LlamaForCausalLM":0:0), %arg141: tensor<1xf32> {ttir.name = "input_1_multiply_1080"} loc("LlamaForCausalLM":0:0), %arg142: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1081"} loc("LlamaForCausalLM":0:0), %arg143: tensor<1xf32> {ttir.name = "input_1_add_1102"} loc("LlamaForCausalLM":0:0), %arg144: tensor<1xf32> {ttir.name = "input_1_add_1122"} loc("LlamaForCausalLM":0:0), %arg145: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1132"} loc("LlamaForCausalLM":0:0), %arg146: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1142.1"} loc("LlamaForCausalLM":0:0), %arg147: tensor<1xf32> {ttir.name = "input_1_multiply_1143"} loc("LlamaForCausalLM":0:0), %arg148: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1144.1"} loc("LlamaForCausalLM":0:0), %arg149: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1156.1"} loc("LlamaForCausalLM":0:0), %arg150: tensor<1xf32> {ttir.name = "input_1_multiply_1157"} loc("LlamaForCausalLM":0:0), %arg151: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1158.1"} loc("LlamaForCausalLM":0:0), %arg152: tensor<1xf32> {ttir.name = "input_1_multiply_1166"} loc("LlamaForCausalLM":0:0), %arg153: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1167"} loc("LlamaForCausalLM":0:0), %arg154: tensor<1xf32> {ttir.name = "input_1_add_1188"} loc("LlamaForCausalLM":0:0), %arg155: tensor<1xf32> {ttir.name = "input_1_add_1208"} loc("LlamaForCausalLM":0:0), %arg156: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1218"} loc("LlamaForCausalLM":0:0), %arg157: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1228.1"} loc("LlamaForCausalLM":0:0), %arg158: tensor<1xf32> {ttir.name = "input_1_multiply_1229"} loc("LlamaForCausalLM":0:0), %arg159: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1230.1"} loc("LlamaForCausalLM":0:0), %arg160: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1242.1"} loc("LlamaForCausalLM":0:0), %arg161: tensor<1xf32> {ttir.name = "input_1_multiply_1243"} loc("LlamaForCausalLM":0:0), %arg162: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1244.1"} loc("LlamaForCausalLM":0:0), %arg163: tensor<1xf32> {ttir.name = "input_1_multiply_1252"} loc("LlamaForCausalLM":0:0), %arg164: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1253"} loc("LlamaForCausalLM":0:0), %arg165: tensor<1xf32> {ttir.name = "input_1_add_1274"} loc("LlamaForCausalLM":0:0), %arg166: tensor<1xf32> {ttir.name = "input_1_add_1294"} loc("LlamaForCausalLM":0:0), %arg167: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1304"} loc("LlamaForCausalLM":0:0), %arg168: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1314.1"} loc("LlamaForCausalLM":0:0), %arg169: tensor<1xf32> {ttir.name = "input_1_multiply_1315"} loc("LlamaForCausalLM":0:0), %arg170: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1316.1"} loc("LlamaForCausalLM":0:0), %arg171: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1328.1"} loc("LlamaForCausalLM":0:0), %arg172: tensor<1xf32> {ttir.name = "input_1_multiply_1329"} loc("LlamaForCausalLM":0:0), %arg173: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1330.1"} loc("LlamaForCausalLM":0:0), %arg174: tensor<1xf32> {ttir.name = "input_1_multiply_1338"} loc("LlamaForCausalLM":0:0), %arg175: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1339"} loc("LlamaForCausalLM":0:0), %arg176: tensor<1xf32> {ttir.name = "input_1_add_1360"} loc("LlamaForCausalLM":0:0), %arg177: tensor<1xf32> {ttir.name = "input_1_add_1380"} loc("LlamaForCausalLM":0:0), %arg178: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1390"} loc("LlamaForCausalLM":0:0), %arg179: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1400.1"} loc("LlamaForCausalLM":0:0), %arg180: tensor<1xf32> {ttir.name = "input_1_multiply_1401"} loc("LlamaForCausalLM":0:0), %arg181: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1402.1"} loc("LlamaForCausalLM":0:0), %arg182: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1414.1"} loc("LlamaForCausalLM":0:0), %arg183: tensor<1xf32> {ttir.name = "input_1_multiply_1415"} loc("LlamaForCausalLM":0:0), %arg184: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1416.1"} loc("LlamaForCausalLM":0:0), %arg185: tensor<1xf32> {ttir.name = "input_1_multiply_1424"} loc("LlamaForCausalLM":0:0), %arg186: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1425"} loc("LlamaForCausalLM":0:0), %arg187: tensor<1xf32> {ttir.name = "input_1_add_1446"} loc("LlamaForCausalLM":0:0), %arg188: tensor<1xf32> {ttir.name = "input_1_add_1466"} loc("LlamaForCausalLM":0:0), %arg189: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1476"} loc("LlamaForCausalLM":0:0), %arg190: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1486.1"} loc("LlamaForCausalLM":0:0), %arg191: tensor<1xf32> {ttir.name = "input_1_multiply_1487"} loc("LlamaForCausalLM":0:0), %arg192: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1488.1"} loc("LlamaForCausalLM":0:0), %arg193: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1500.1"} loc("LlamaForCausalLM":0:0), %arg194: tensor<1xf32> {ttir.name = "input_1_multiply_1501"} loc("LlamaForCausalLM":0:0), %arg195: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1502.1"} loc("LlamaForCausalLM":0:0), %arg196: tensor<1xf32> {ttir.name = "input_1_multiply_1510"} loc("LlamaForCausalLM":0:0), %arg197: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1511"} loc("LlamaForCausalLM":0:0), %arg198: tensor<1xf32> {ttir.name = "input_1_add_1532"} loc("LlamaForCausalLM":0:0), %arg199: tensor<1xf32> {ttir.name = "input_1_add_1552"} loc("LlamaForCausalLM":0:0), %arg200: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1562"} loc("LlamaForCausalLM":0:0), %arg201: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1572.1"} loc("LlamaForCausalLM":0:0), %arg202: tensor<1xf32> {ttir.name = "input_1_multiply_1573"} loc("LlamaForCausalLM":0:0), %arg203: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1574.1"} loc("LlamaForCausalLM":0:0), %arg204: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1586.1"} loc("LlamaForCausalLM":0:0), %arg205: tensor<1xf32> {ttir.name = "input_1_multiply_1587"} loc("LlamaForCausalLM":0:0), %arg206: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1588.1"} loc("LlamaForCausalLM":0:0), %arg207: tensor<1xf32> {ttir.name = "input_1_multiply_1596"} loc("LlamaForCausalLM":0:0), %arg208: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1597"} loc("LlamaForCausalLM":0:0), %arg209: tensor<1xf32> {ttir.name = "input_1_add_1618"} loc("LlamaForCausalLM":0:0), %arg210: tensor<1xf32> {ttir.name = "input_1_add_1638"} loc("LlamaForCausalLM":0:0), %arg211: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1648"} loc("LlamaForCausalLM":0:0), %arg212: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1658.1"} loc("LlamaForCausalLM":0:0), %arg213: tensor<1xf32> {ttir.name = "input_1_multiply_1659"} loc("LlamaForCausalLM":0:0), %arg214: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1660.1"} loc("LlamaForCausalLM":0:0), %arg215: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1672.1"} loc("LlamaForCausalLM":0:0), %arg216: tensor<1xf32> {ttir.name = "input_1_multiply_1673"} loc("LlamaForCausalLM":0:0), %arg217: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1674.1"} loc("LlamaForCausalLM":0:0), %arg218: tensor<1xf32> {ttir.name = "input_1_multiply_1682"} loc("LlamaForCausalLM":0:0), %arg219: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1683"} loc("LlamaForCausalLM":0:0), %arg220: tensor<1xf32> {ttir.name = "input_1_add_1704"} loc("LlamaForCausalLM":0:0), %arg221: tensor<1xf32> {ttir.name = "input_1_add_1724"} loc("LlamaForCausalLM":0:0), %arg222: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1734"} loc("LlamaForCausalLM":0:0), %arg223: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1744.1"} loc("LlamaForCausalLM":0:0), %arg224: tensor<1xf32> {ttir.name = "input_1_multiply_1745"} loc("LlamaForCausalLM":0:0), %arg225: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1746.1"} loc("LlamaForCausalLM":0:0), %arg226: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1758.1"} loc("LlamaForCausalLM":0:0), %arg227: tensor<1xf32> {ttir.name = "input_1_multiply_1759"} loc("LlamaForCausalLM":0:0), %arg228: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1760.1"} loc("LlamaForCausalLM":0:0), %arg229: tensor<1xf32> {ttir.name = "input_1_multiply_1768"} loc("LlamaForCausalLM":0:0), %arg230: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1769"} loc("LlamaForCausalLM":0:0), %arg231: tensor<1xf32> {ttir.name = "input_1_add_1790"} loc("LlamaForCausalLM":0:0), %arg232: tensor<1xf32> {ttir.name = "input_1_add_1810"} loc("LlamaForCausalLM":0:0), %arg233: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1820"} loc("LlamaForCausalLM":0:0), %arg234: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1830.1"} loc("LlamaForCausalLM":0:0), %arg235: tensor<1xf32> {ttir.name = "input_1_multiply_1831"} loc("LlamaForCausalLM":0:0), %arg236: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1832.1"} loc("LlamaForCausalLM":0:0), %arg237: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1844.1"} loc("LlamaForCausalLM":0:0), %arg238: tensor<1xf32> {ttir.name = "input_1_multiply_1845"} loc("LlamaForCausalLM":0:0), %arg239: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1846.1"} loc("LlamaForCausalLM":0:0), %arg240: tensor<1xf32> {ttir.name = "input_1_multiply_1854"} loc("LlamaForCausalLM":0:0), %arg241: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1855"} loc("LlamaForCausalLM":0:0), %arg242: tensor<1xf32> {ttir.name = "input_1_add_1876"} loc("LlamaForCausalLM":0:0), %arg243: tensor<1xf32> {ttir.name = "input_1_add_1896"} loc("LlamaForCausalLM":0:0), %arg244: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1906"} loc("LlamaForCausalLM":0:0), %arg245: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1916.1"} loc("LlamaForCausalLM":0:0), %arg246: tensor<1xf32> {ttir.name = "input_1_multiply_1917"} loc("LlamaForCausalLM":0:0), %arg247: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1918.1"} loc("LlamaForCausalLM":0:0), %arg248: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1930.1"} loc("LlamaForCausalLM":0:0), %arg249: tensor<1xf32> {ttir.name = "input_1_multiply_1931"} loc("LlamaForCausalLM":0:0), %arg250: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1932.1"} loc("LlamaForCausalLM":0:0), %arg251: tensor<1xf32> {ttir.name = "input_1_multiply_1940"} loc("LlamaForCausalLM":0:0), %arg252: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1941"} loc("LlamaForCausalLM":0:0), %arg253: tensor<1xf32> {ttir.name = "input_1_add_1962"} loc("LlamaForCausalLM":0:0), %arg254: tensor<1xf32> {ttir.name = "input_1_add_1982"} loc("LlamaForCausalLM":0:0), %arg255: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1992"} loc("LlamaForCausalLM":0:0), %arg256: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2002.1"} loc("LlamaForCausalLM":0:0), %arg257: tensor<1xf32> {ttir.name = "input_1_multiply_2003"} loc("LlamaForCausalLM":0:0), %arg258: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2004.1"} loc("LlamaForCausalLM":0:0), %arg259: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2016.1"} loc("LlamaForCausalLM":0:0), %arg260: tensor<1xf32> {ttir.name = "input_1_multiply_2017"} loc("LlamaForCausalLM":0:0), %arg261: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2018.1"} loc("LlamaForCausalLM":0:0), %arg262: tensor<1xf32> {ttir.name = "input_1_multiply_2026"} loc("LlamaForCausalLM":0:0), %arg263: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_2027"} loc("LlamaForCausalLM":0:0), %arg264: tensor<1xf32> {ttir.name = "input_1_add_2048"} loc("LlamaForCausalLM":0:0), %arg265: tensor<1xf32> {ttir.name = "input_1_add_2068"} loc("LlamaForCausalLM":0:0), %arg266: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_2078"} loc("LlamaForCausalLM":0:0), %arg267: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2088.1"} loc("LlamaForCausalLM":0:0), %arg268: tensor<1xf32> {ttir.name = "input_1_multiply_2089"} loc("LlamaForCausalLM":0:0), %arg269: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2090.1"} loc("LlamaForCausalLM":0:0), %arg270: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2102.1"} loc("LlamaForCausalLM":0:0), %arg271: tensor<1xf32> {ttir.name = "input_1_multiply_2103"} loc("LlamaForCausalLM":0:0), %arg272: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2104.1"} loc("LlamaForCausalLM":0:0), %arg273: tensor<1xf32> {ttir.name = "input_1_multiply_2112"} loc("LlamaForCausalLM":0:0), %arg274: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_2113"} loc("LlamaForCausalLM":0:0), %arg275: tensor<1xf32> {ttir.name = "input_1_add_2134"} loc("LlamaForCausalLM":0:0), %arg276: tensor<1xf32> {ttir.name = "input_1_add_2154"} loc("LlamaForCausalLM":0:0), %arg277: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_2164"} loc("LlamaForCausalLM":0:0), %arg278: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2174.1"} loc("LlamaForCausalLM":0:0), %arg279: tensor<1xf32> {ttir.name = "input_1_multiply_2175"} loc("LlamaForCausalLM":0:0), %arg280: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2176.1"} loc("LlamaForCausalLM":0:0), %arg281: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2188.1"} loc("LlamaForCausalLM":0:0), %arg282: tensor<1xf32> {ttir.name = "input_1_multiply_2189"} loc("LlamaForCausalLM":0:0), %arg283: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2190.1"} loc("LlamaForCausalLM":0:0), %arg284: tensor<1xf32> {ttir.name = "input_1_multiply_2198"} loc("LlamaForCausalLM":0:0), %arg285: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_2199"} loc("LlamaForCausalLM":0:0), %arg286: tensor<1xf32> {ttir.name = "input_1_add_2220"} loc("LlamaForCausalLM":0:0), %arg287: tensor<1xf32> {ttir.name = "input_1_add_2240"} loc("LlamaForCausalLM":0:0), %arg288: tensor<3200xf32> {ttir.name = "model.norm.weight"} loc("LlamaForCausalLM":0:0), %arg289: tensor<32000x3200xf32> {ttir.name = "model.embed_tokens.weight"} loc("LlamaForCausalLM":0:0), %arg290: tensor<3200xf32> {ttir.name = "model.layers.0.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg291: tensor<3200x3200xf32> {ttir.name = "model.layers.0.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg292: tensor<3200x3200xf32> {ttir.name = "model.layers.0.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg293: tensor<3200x3200xf32> {ttir.name = "model.layers.0.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg294: tensor<3200x3200xf32> {ttir.name = "model.layers.0.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg295: tensor<3200xf32> {ttir.name = "model.layers.0.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg296: tensor<3200x8640xf32> {ttir.name = "model.layers.0.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg297: tensor<3200x8640xf32> {ttir.name = "model.layers.0.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg298: tensor<8640x3200xf32> {ttir.name = "model.layers.0.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg299: tensor<3200xf32> {ttir.name = "model.layers.1.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg300: tensor<3200x3200xf32> {ttir.name = "model.layers.1.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg301: tensor<3200x3200xf32> {ttir.name = "model.layers.1.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg302: tensor<3200x3200xf32> {ttir.name = "model.layers.1.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg303: tensor<3200x3200xf32> {ttir.name = "model.layers.1.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg304: tensor<3200xf32> {ttir.name = "model.layers.1.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg305: tensor<3200x8640xf32> {ttir.name = "model.layers.1.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg306: tensor<3200x8640xf32> {ttir.name = "model.layers.1.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg307: tensor<8640x3200xf32> {ttir.name = "model.layers.1.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg308: tensor<3200xf32> {ttir.name = "model.layers.2.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg309: tensor<3200x3200xf32> {ttir.name = "model.layers.2.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg310: tensor<3200x3200xf32> {ttir.name = "model.layers.2.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg311: tensor<3200x3200xf32> {ttir.name = "model.layers.2.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg312: tensor<3200x3200xf32> {ttir.name = "model.layers.2.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg313: tensor<3200xf32> {ttir.name = "model.layers.2.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg314: tensor<3200x8640xf32> {ttir.name = "model.layers.2.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg315: tensor<3200x8640xf32> {ttir.name = "model.layers.2.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg316: tensor<8640x3200xf32> {ttir.name = "model.layers.2.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg317: tensor<3200xf32> {ttir.name = "model.layers.3.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg318: tensor<3200x3200xf32> {ttir.name = "model.layers.3.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg319: tensor<3200x3200xf32> {ttir.name = "model.layers.3.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg320: tensor<3200x3200xf32> {ttir.name = "model.layers.3.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg321: tensor<3200x3200xf32> {ttir.name = "model.layers.3.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg322: tensor<3200xf32> {ttir.name = "model.layers.3.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg323: tensor<3200x8640xf32> {ttir.name = "model.layers.3.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg324: tensor<3200x8640xf32> {ttir.name = "model.layers.3.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg325: tensor<8640x3200xf32> {ttir.name = "model.layers.3.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg326: tensor<3200xf32> {ttir.name = "model.layers.4.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg327: tensor<3200x3200xf32> {ttir.name = "model.layers.4.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg328: tensor<3200x3200xf32> {ttir.name = "model.layers.4.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg329: tensor<3200x3200xf32> {ttir.name = "model.layers.4.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg330: tensor<3200x3200xf32> {ttir.name = "model.layers.4.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg331: tensor<3200xf32> {ttir.name = "model.layers.4.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg332: tensor<3200x8640xf32> {ttir.name = "model.layers.4.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg333: tensor<3200x8640xf32> {ttir.name = "model.layers.4.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg334: tensor<8640x3200xf32> {ttir.name = "model.layers.4.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg335: tensor<3200xf32> {ttir.name = "model.layers.5.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg336: tensor<3200x3200xf32> {ttir.name = "model.layers.5.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg337: tensor<3200x3200xf32> {ttir.name = "model.layers.5.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg338: tensor<3200x3200xf32> {ttir.name = "model.layers.5.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg339: tensor<3200x3200xf32> {ttir.name = "model.layers.5.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg340: tensor<3200xf32> {ttir.name = "model.layers.5.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg341: tensor<3200x8640xf32> {ttir.name = "model.layers.5.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg342: tensor<3200x8640xf32> {ttir.name = "model.layers.5.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg343: tensor<8640x3200xf32> {ttir.name = "model.layers.5.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg344: tensor<3200xf32> {ttir.name = "model.layers.6.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg345: tensor<3200x3200xf32> {ttir.name = "model.layers.6.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg346: tensor<3200x3200xf32> {ttir.name = "model.layers.6.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg347: tensor<3200x3200xf32> {ttir.name = "model.layers.6.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg348: tensor<3200x3200xf32> {ttir.name = "model.layers.6.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg349: tensor<3200xf32> {ttir.name = "model.layers.6.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg350: tensor<3200x8640xf32> {ttir.name = "model.layers.6.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg351: tensor<3200x8640xf32> {ttir.name = "model.layers.6.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg352: tensor<8640x3200xf32> {ttir.name = "model.layers.6.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg353: tensor<3200xf32> {ttir.name = "model.layers.7.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg354: tensor<3200x3200xf32> {ttir.name = "model.layers.7.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg355: tensor<3200x3200xf32> {ttir.name = "model.layers.7.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg356: tensor<3200x3200xf32> {ttir.name = "model.layers.7.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg357: tensor<3200x3200xf32> {ttir.name = "model.layers.7.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg358: tensor<3200xf32> {ttir.name = "model.layers.7.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg359: tensor<3200x8640xf32> {ttir.name = "model.layers.7.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg360: tensor<3200x8640xf32> {ttir.name = "model.layers.7.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg361: tensor<8640x3200xf32> {ttir.name = "model.layers.7.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg362: tensor<3200xf32> {ttir.name = "model.layers.8.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg363: tensor<3200x3200xf32> {ttir.name = "model.layers.8.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg364: tensor<3200x3200xf32> {ttir.name = "model.layers.8.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg365: tensor<3200x3200xf32> {ttir.name = "model.layers.8.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg366: tensor<3200x3200xf32> {ttir.name = "model.layers.8.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg367: tensor<3200xf32> {ttir.name = "model.layers.8.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg368: tensor<3200x8640xf32> {ttir.name = "model.layers.8.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg369: tensor<3200x8640xf32> {ttir.name = "model.layers.8.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg370: tensor<8640x3200xf32> {ttir.name = "model.layers.8.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg371: tensor<3200xf32> {ttir.name = "model.layers.9.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg372: tensor<3200x3200xf32> {ttir.name = "model.layers.9.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg373: tensor<3200x3200xf32> {ttir.name = "model.layers.9.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg374: tensor<3200x3200xf32> {ttir.name = "model.layers.9.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg375: tensor<3200x3200xf32> {ttir.name = "model.layers.9.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg376: tensor<3200xf32> {ttir.name = "model.layers.9.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg377: tensor<3200x8640xf32> {ttir.name = "model.layers.9.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg378: tensor<3200x8640xf32> {ttir.name = "model.layers.9.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg379: tensor<8640x3200xf32> {ttir.name = "model.layers.9.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg380: tensor<3200xf32> {ttir.name = "model.layers.10.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg381: tensor<3200x3200xf32> {ttir.name = "model.layers.10.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg382: tensor<3200x3200xf32> {ttir.name = "model.layers.10.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg383: tensor<3200x3200xf32> {ttir.name = "model.layers.10.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg384: tensor<3200x3200xf32> {ttir.name = "model.layers.10.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg385: tensor<3200xf32> {ttir.name = "model.layers.10.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg386: tensor<3200x8640xf32> {ttir.name = "model.layers.10.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg387: tensor<3200x8640xf32> {ttir.name = "model.layers.10.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg388: tensor<8640x3200xf32> {ttir.name = "model.layers.10.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg389: tensor<3200xf32> {ttir.name = "model.layers.11.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg390: tensor<3200x3200xf32> {ttir.name = "model.layers.11.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg391: tensor<3200x3200xf32> {ttir.name = "model.layers.11.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg392: tensor<3200x3200xf32> {ttir.name = "model.layers.11.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg393: tensor<3200x3200xf32> {ttir.name = "model.layers.11.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg394: tensor<3200xf32> {ttir.name = "model.layers.11.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg395: tensor<3200x8640xf32> {ttir.name = "model.layers.11.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg396: tensor<3200x8640xf32> {ttir.name = "model.layers.11.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg397: tensor<8640x3200xf32> {ttir.name = "model.layers.11.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg398: tensor<3200xf32> {ttir.name = "model.layers.12.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg399: tensor<3200x3200xf32> {ttir.name = "model.layers.12.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg400: tensor<3200x3200xf32> {ttir.name = "model.layers.12.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg401: tensor<3200x3200xf32> {ttir.name = "model.layers.12.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg402: tensor<3200x3200xf32> {ttir.name = "model.layers.12.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg403: tensor<3200xf32> {ttir.name = "model.layers.12.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg404: tensor<3200x8640xf32> {ttir.name = "model.layers.12.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg405: tensor<3200x8640xf32> {ttir.name = "model.layers.12.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg406: tensor<8640x3200xf32> {ttir.name = "model.layers.12.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg407: tensor<3200xf32> {ttir.name = "model.layers.13.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg408: tensor<3200x3200xf32> {ttir.name = "model.layers.13.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg409: tensor<3200x3200xf32> {ttir.name = "model.layers.13.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg410: tensor<3200x3200xf32> {ttir.name = "model.layers.13.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg411: tensor<3200x3200xf32> {ttir.name = "model.layers.13.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg412: tensor<3200xf32> {ttir.name = "model.layers.13.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg413: tensor<3200x8640xf32> {ttir.name = "model.layers.13.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg414: tensor<3200x8640xf32> {ttir.name = "model.layers.13.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg415: tensor<8640x3200xf32> {ttir.name = "model.layers.13.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg416: tensor<3200xf32> {ttir.name = "model.layers.14.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg417: tensor<3200x3200xf32> {ttir.name = "model.layers.14.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg418: tensor<3200x3200xf32> {ttir.name = "model.layers.14.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg419: tensor<3200x3200xf32> {ttir.name = "model.layers.14.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg420: tensor<3200x3200xf32> {ttir.name = "model.layers.14.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg421: tensor<3200xf32> {ttir.name = "model.layers.14.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg422: tensor<3200x8640xf32> {ttir.name = "model.layers.14.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg423: tensor<3200x8640xf32> {ttir.name = "model.layers.14.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg424: tensor<8640x3200xf32> {ttir.name = "model.layers.14.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg425: tensor<3200xf32> {ttir.name = "model.layers.15.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg426: tensor<3200x3200xf32> {ttir.name = "model.layers.15.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg427: tensor<3200x3200xf32> {ttir.name = "model.layers.15.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg428: tensor<3200x3200xf32> {ttir.name = "model.layers.15.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg429: tensor<3200x3200xf32> {ttir.name = "model.layers.15.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg430: tensor<3200xf32> {ttir.name = "model.layers.15.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg431: tensor<3200x8640xf32> {ttir.name = "model.layers.15.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg432: tensor<3200x8640xf32> {ttir.name = "model.layers.15.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg433: tensor<8640x3200xf32> {ttir.name = "model.layers.15.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg434: tensor<3200xf32> {ttir.name = "model.layers.16.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg435: tensor<3200x3200xf32> {ttir.name = "model.layers.16.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg436: tensor<3200x3200xf32> {ttir.name = "model.layers.16.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg437: tensor<3200x3200xf32> {ttir.name = "model.layers.16.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg438: tensor<3200x3200xf32> {ttir.name = "model.layers.16.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg439: tensor<3200xf32> {ttir.name = "model.layers.16.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg440: tensor<3200x8640xf32> {ttir.name = "model.layers.16.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg441: tensor<3200x8640xf32> {ttir.name = "model.layers.16.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg442: tensor<8640x3200xf32> {ttir.name = "model.layers.16.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg443: tensor<3200xf32> {ttir.name = "model.layers.17.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg444: tensor<3200x3200xf32> {ttir.name = "model.layers.17.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg445: tensor<3200x3200xf32> {ttir.name = "model.layers.17.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg446: tensor<3200x3200xf32> {ttir.name = "model.layers.17.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg447: tensor<3200x3200xf32> {ttir.name = "model.layers.17.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg448: tensor<3200xf32> {ttir.name = "model.layers.17.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg449: tensor<3200x8640xf32> {ttir.name = "model.layers.17.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg450: tensor<3200x8640xf32> {ttir.name = "model.layers.17.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg451: tensor<8640x3200xf32> {ttir.name = "model.layers.17.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg452: tensor<3200xf32> {ttir.name = "model.layers.18.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg453: tensor<3200x3200xf32> {ttir.name = "model.layers.18.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg454: tensor<3200x3200xf32> {ttir.name = "model.layers.18.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg455: tensor<3200x3200xf32> {ttir.name = "model.layers.18.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg456: tensor<3200x3200xf32> {ttir.name = "model.layers.18.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg457: tensor<3200xf32> {ttir.name = "model.layers.18.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg458: tensor<3200x8640xf32> {ttir.name = "model.layers.18.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg459: tensor<3200x8640xf32> {ttir.name = "model.layers.18.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg460: tensor<8640x3200xf32> {ttir.name = "model.layers.18.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg461: tensor<3200xf32> {ttir.name = "model.layers.19.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg462: tensor<3200x3200xf32> {ttir.name = "model.layers.19.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg463: tensor<3200x3200xf32> {ttir.name = "model.layers.19.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg464: tensor<3200x3200xf32> {ttir.name = "model.layers.19.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg465: tensor<3200x3200xf32> {ttir.name = "model.layers.19.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg466: tensor<3200xf32> {ttir.name = "model.layers.19.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg467: tensor<3200x8640xf32> {ttir.name = "model.layers.19.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg468: tensor<3200x8640xf32> {ttir.name = "model.layers.19.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg469: tensor<8640x3200xf32> {ttir.name = "model.layers.19.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg470: tensor<3200xf32> {ttir.name = "model.layers.20.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg471: tensor<3200x3200xf32> {ttir.name = "model.layers.20.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg472: tensor<3200x3200xf32> {ttir.name = "model.layers.20.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg473: tensor<3200x3200xf32> {ttir.name = "model.layers.20.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg474: tensor<3200x3200xf32> {ttir.name = "model.layers.20.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg475: tensor<3200xf32> {ttir.name = "model.layers.20.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg476: tensor<3200x8640xf32> {ttir.name = "model.layers.20.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg477: tensor<3200x8640xf32> {ttir.name = "model.layers.20.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg478: tensor<8640x3200xf32> {ttir.name = "model.layers.20.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg479: tensor<3200xf32> {ttir.name = "model.layers.21.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg480: tensor<3200x3200xf32> {ttir.name = "model.layers.21.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg481: tensor<3200x3200xf32> {ttir.name = "model.layers.21.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg482: tensor<3200x3200xf32> {ttir.name = "model.layers.21.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg483: tensor<3200x3200xf32> {ttir.name = "model.layers.21.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg484: tensor<3200xf32> {ttir.name = "model.layers.21.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg485: tensor<3200x8640xf32> {ttir.name = "model.layers.21.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg486: tensor<3200x8640xf32> {ttir.name = "model.layers.21.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg487: tensor<8640x3200xf32> {ttir.name = "model.layers.21.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg488: tensor<3200xf32> {ttir.name = "model.layers.22.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg489: tensor<3200x3200xf32> {ttir.name = "model.layers.22.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg490: tensor<3200x3200xf32> {ttir.name = "model.layers.22.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg491: tensor<3200x3200xf32> {ttir.name = "model.layers.22.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg492: tensor<3200x3200xf32> {ttir.name = "model.layers.22.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg493: tensor<3200xf32> {ttir.name = "model.layers.22.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg494: tensor<3200x8640xf32> {ttir.name = "model.layers.22.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg495: tensor<3200x8640xf32> {ttir.name = "model.layers.22.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg496: tensor<8640x3200xf32> {ttir.name = "model.layers.22.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg497: tensor<3200xf32> {ttir.name = "model.layers.23.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg498: tensor<3200x3200xf32> {ttir.name = "model.layers.23.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg499: tensor<3200x3200xf32> {ttir.name = "model.layers.23.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg500: tensor<3200x3200xf32> {ttir.name = "model.layers.23.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg501: tensor<3200x3200xf32> {ttir.name = "model.layers.23.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg502: tensor<3200xf32> {ttir.name = "model.layers.23.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg503: tensor<3200x8640xf32> {ttir.name = "model.layers.23.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg504: tensor<3200x8640xf32> {ttir.name = "model.layers.23.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg505: tensor<8640x3200xf32> {ttir.name = "model.layers.23.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg506: tensor<3200xf32> {ttir.name = "model.layers.24.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg507: tensor<3200x3200xf32> {ttir.name = "model.layers.24.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg508: tensor<3200x3200xf32> {ttir.name = "model.layers.24.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg509: tensor<3200x3200xf32> {ttir.name = "model.layers.24.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg510: tensor<3200x3200xf32> {ttir.name = "model.layers.24.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg511: tensor<3200xf32> {ttir.name = "model.layers.24.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg512: tensor<3200x8640xf32> {ttir.name = "model.layers.24.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg513: tensor<3200x8640xf32> {ttir.name = "model.layers.24.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg514: tensor<8640x3200xf32> {ttir.name = "model.layers.24.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg515: tensor<3200xf32> {ttir.name = "model.layers.25.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg516: tensor<3200x3200xf32> {ttir.name = "model.layers.25.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg517: tensor<3200x3200xf32> {ttir.name = "model.layers.25.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg518: tensor<3200x3200xf32> {ttir.name = "model.layers.25.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg519: tensor<3200x3200xf32> {ttir.name = "model.layers.25.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg520: tensor<3200xf32> {ttir.name = "model.layers.25.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg521: tensor<3200x8640xf32> {ttir.name = "model.layers.25.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg522: tensor<3200x8640xf32> {ttir.name = "model.layers.25.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg523: tensor<8640x3200xf32> {ttir.name = "model.layers.25.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg524: tensor<3200x32000xf32> {ttir.name = "lm_head.weight"} loc("LlamaForCausalLM":0:0)) -> (tensor<1x12x3200xf32> {ttir.name = "LlamaForCausalLM.output_matmul_2246"}) {
     %0 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2091)
     %1 = "ttir.embedding"(%arg0, %arg289, %0) <{operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12xi32>, tensor<32000x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2091)
diff --git a/tools/explorer/test/models/open_llama_3b_ttir.mlir b/tools/explorer/test/models/open_llama_3b_ttir.mlir
new file mode 100644
index 000000000..231432712
--- /dev/null
+++ b/tools/explorer/test/models/open_llama_3b_ttir.mlir
@@ -0,0 +1,8364 @@
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>
+#loc = loc("LlamaForCausalLM":0:0)
+module @LlamaForCausalLM attributes {} {
+  func.func @forward(%arg0: tensor<1x12xi32> {ttir.name = "input_1"} loc("LlamaForCausalLM":0:0), %arg1: tensor<1xf32> {ttir.name = "input_1_add_4"} loc("LlamaForCausalLM":0:0), %arg2: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_14"} loc("LlamaForCausalLM":0:0), %arg3: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_24.1"} loc("LlamaForCausalLM":0:0), %arg4: tensor<1xf32> {ttir.name = "input_1_multiply_25"} loc("LlamaForCausalLM":0:0), %arg5: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_26.1"} loc("LlamaForCausalLM":0:0), %arg6: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_38.1"} loc("LlamaForCausalLM":0:0), %arg7: tensor<1xf32> {ttir.name = "input_1_multiply_39"} loc("LlamaForCausalLM":0:0), %arg8: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_40.1"} loc("LlamaForCausalLM":0:0), %arg9: tensor<1xf32> {ttir.name = "input_1_multiply_48"} loc("LlamaForCausalLM":0:0), %arg10: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_49"} loc("LlamaForCausalLM":0:0), %arg11: tensor<1xf32> {ttir.name = "input_1_add_70"} loc("LlamaForCausalLM":0:0), %arg12: tensor<1xf32> {ttir.name = "input_1_add_90"} loc("LlamaForCausalLM":0:0), %arg13: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_100"} loc("LlamaForCausalLM":0:0), %arg14: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_110.1"} loc("LlamaForCausalLM":0:0), %arg15: tensor<1xf32> {ttir.name = "input_1_multiply_111"} loc("LlamaForCausalLM":0:0), %arg16: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_112.1"} loc("LlamaForCausalLM":0:0), %arg17: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_124.1"} loc("LlamaForCausalLM":0:0), %arg18: tensor<1xf32> {ttir.name = "input_1_multiply_125"} loc("LlamaForCausalLM":0:0), %arg19: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_126.1"} loc("LlamaForCausalLM":0:0), %arg20: tensor<1xf32> {ttir.name = "input_1_multiply_134"} loc("LlamaForCausalLM":0:0), %arg21: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_135"} loc("LlamaForCausalLM":0:0), %arg22: tensor<1xf32> {ttir.name = "input_1_add_156"} loc("LlamaForCausalLM":0:0), %arg23: tensor<1xf32> {ttir.name = "input_1_add_176"} loc("LlamaForCausalLM":0:0), %arg24: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_186"} loc("LlamaForCausalLM":0:0), %arg25: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_196.1"} loc("LlamaForCausalLM":0:0), %arg26: tensor<1xf32> {ttir.name = "input_1_multiply_197"} loc("LlamaForCausalLM":0:0), %arg27: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_198.1"} loc("LlamaForCausalLM":0:0), %arg28: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_210.1"} loc("LlamaForCausalLM":0:0), %arg29: tensor<1xf32> {ttir.name = "input_1_multiply_211"} loc("LlamaForCausalLM":0:0), %arg30: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_212.1"} loc("LlamaForCausalLM":0:0), %arg31: tensor<1xf32> {ttir.name = "input_1_multiply_220"} loc("LlamaForCausalLM":0:0), %arg32: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_221"} loc("LlamaForCausalLM":0:0), %arg33: tensor<1xf32> {ttir.name = "input_1_add_242"} loc("LlamaForCausalLM":0:0), %arg34: tensor<1xf32> {ttir.name = "input_1_add_262"} loc("LlamaForCausalLM":0:0), %arg35: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_272"} loc("LlamaForCausalLM":0:0), %arg36: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_282.1"} loc("LlamaForCausalLM":0:0), %arg37: tensor<1xf32> {ttir.name = "input_1_multiply_283"} loc("LlamaForCausalLM":0:0), %arg38: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_284.1"} loc("LlamaForCausalLM":0:0), %arg39: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_296.1"} loc("LlamaForCausalLM":0:0), %arg40: tensor<1xf32> {ttir.name = "input_1_multiply_297"} loc("LlamaForCausalLM":0:0), %arg41: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_298.1"} loc("LlamaForCausalLM":0:0), %arg42: tensor<1xf32> {ttir.name = "input_1_multiply_306"} loc("LlamaForCausalLM":0:0), %arg43: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_307"} loc("LlamaForCausalLM":0:0), %arg44: tensor<1xf32> {ttir.name = "input_1_add_328"} loc("LlamaForCausalLM":0:0), %arg45: tensor<1xf32> {ttir.name = "input_1_add_348"} loc("LlamaForCausalLM":0:0), %arg46: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_358"} loc("LlamaForCausalLM":0:0), %arg47: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_368.1"} loc("LlamaForCausalLM":0:0), %arg48: tensor<1xf32> {ttir.name = "input_1_multiply_369"} loc("LlamaForCausalLM":0:0), %arg49: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_370.1"} loc("LlamaForCausalLM":0:0), %arg50: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_382.1"} loc("LlamaForCausalLM":0:0), %arg51: tensor<1xf32> {ttir.name = "input_1_multiply_383"} loc("LlamaForCausalLM":0:0), %arg52: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_384.1"} loc("LlamaForCausalLM":0:0), %arg53: tensor<1xf32> {ttir.name = "input_1_multiply_392"} loc("LlamaForCausalLM":0:0), %arg54: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_393"} loc("LlamaForCausalLM":0:0), %arg55: tensor<1xf32> {ttir.name = "input_1_add_414"} loc("LlamaForCausalLM":0:0), %arg56: tensor<1xf32> {ttir.name = "input_1_add_434"} loc("LlamaForCausalLM":0:0), %arg57: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_444"} loc("LlamaForCausalLM":0:0), %arg58: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_454.1"} loc("LlamaForCausalLM":0:0), %arg59: tensor<1xf32> {ttir.name = "input_1_multiply_455"} loc("LlamaForCausalLM":0:0), %arg60: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_456.1"} loc("LlamaForCausalLM":0:0), %arg61: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_468.1"} loc("LlamaForCausalLM":0:0), %arg62: tensor<1xf32> {ttir.name = "input_1_multiply_469"} loc("LlamaForCausalLM":0:0), %arg63: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_470.1"} loc("LlamaForCausalLM":0:0), %arg64: tensor<1xf32> {ttir.name = "input_1_multiply_478"} loc("LlamaForCausalLM":0:0), %arg65: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_479"} loc("LlamaForCausalLM":0:0), %arg66: tensor<1xf32> {ttir.name = "input_1_add_500"} loc("LlamaForCausalLM":0:0), %arg67: tensor<1xf32> {ttir.name = "input_1_add_520"} loc("LlamaForCausalLM":0:0), %arg68: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_530"} loc("LlamaForCausalLM":0:0), %arg69: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_540.1"} loc("LlamaForCausalLM":0:0), %arg70: tensor<1xf32> {ttir.name = "input_1_multiply_541"} loc("LlamaForCausalLM":0:0), %arg71: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_542.1"} loc("LlamaForCausalLM":0:0), %arg72: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_554.1"} loc("LlamaForCausalLM":0:0), %arg73: tensor<1xf32> {ttir.name = "input_1_multiply_555"} loc("LlamaForCausalLM":0:0), %arg74: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_556.1"} loc("LlamaForCausalLM":0:0), %arg75: tensor<1xf32> {ttir.name = "input_1_multiply_564"} loc("LlamaForCausalLM":0:0), %arg76: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_565"} loc("LlamaForCausalLM":0:0), %arg77: tensor<1xf32> {ttir.name = "input_1_add_586"} loc("LlamaForCausalLM":0:0), %arg78: tensor<1xf32> {ttir.name = "input_1_add_606"} loc("LlamaForCausalLM":0:0), %arg79: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_616"} loc("LlamaForCausalLM":0:0), %arg80: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_626.1"} loc("LlamaForCausalLM":0:0), %arg81: tensor<1xf32> {ttir.name = "input_1_multiply_627"} loc("LlamaForCausalLM":0:0), %arg82: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_628.1"} loc("LlamaForCausalLM":0:0), %arg83: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_640.1"} loc("LlamaForCausalLM":0:0), %arg84: tensor<1xf32> {ttir.name = "input_1_multiply_641"} loc("LlamaForCausalLM":0:0), %arg85: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_642.1"} loc("LlamaForCausalLM":0:0), %arg86: tensor<1xf32> {ttir.name = "input_1_multiply_650"} loc("LlamaForCausalLM":0:0), %arg87: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_651"} loc("LlamaForCausalLM":0:0), %arg88: tensor<1xf32> {ttir.name = "input_1_add_672"} loc("LlamaForCausalLM":0:0), %arg89: tensor<1xf32> {ttir.name = "input_1_add_692"} loc("LlamaForCausalLM":0:0), %arg90: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_702"} loc("LlamaForCausalLM":0:0), %arg91: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_712.1"} loc("LlamaForCausalLM":0:0), %arg92: tensor<1xf32> {ttir.name = "input_1_multiply_713"} loc("LlamaForCausalLM":0:0), %arg93: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_714.1"} loc("LlamaForCausalLM":0:0), %arg94: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_726.1"} loc("LlamaForCausalLM":0:0), %arg95: tensor<1xf32> {ttir.name = "input_1_multiply_727"} loc("LlamaForCausalLM":0:0), %arg96: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_728.1"} loc("LlamaForCausalLM":0:0), %arg97: tensor<1xf32> {ttir.name = "input_1_multiply_736"} loc("LlamaForCausalLM":0:0), %arg98: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_737"} loc("LlamaForCausalLM":0:0), %arg99: tensor<1xf32> {ttir.name = "input_1_add_758"} loc("LlamaForCausalLM":0:0), %arg100: tensor<1xf32> {ttir.name = "input_1_add_778"} loc("LlamaForCausalLM":0:0), %arg101: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_788"} loc("LlamaForCausalLM":0:0), %arg102: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_798.1"} loc("LlamaForCausalLM":0:0), %arg103: tensor<1xf32> {ttir.name = "input_1_multiply_799"} loc("LlamaForCausalLM":0:0), %arg104: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_800.1"} loc("LlamaForCausalLM":0:0), %arg105: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_812.1"} loc("LlamaForCausalLM":0:0), %arg106: tensor<1xf32> {ttir.name = "input_1_multiply_813"} loc("LlamaForCausalLM":0:0), %arg107: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_814.1"} loc("LlamaForCausalLM":0:0), %arg108: tensor<1xf32> {ttir.name = "input_1_multiply_822"} loc("LlamaForCausalLM":0:0), %arg109: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_823"} loc("LlamaForCausalLM":0:0), %arg110: tensor<1xf32> {ttir.name = "input_1_add_844"} loc("LlamaForCausalLM":0:0), %arg111: tensor<1xf32> {ttir.name = "input_1_add_864"} loc("LlamaForCausalLM":0:0), %arg112: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_874"} loc("LlamaForCausalLM":0:0), %arg113: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_884.1"} loc("LlamaForCausalLM":0:0), %arg114: tensor<1xf32> {ttir.name = "input_1_multiply_885"} loc("LlamaForCausalLM":0:0), %arg115: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_886.1"} loc("LlamaForCausalLM":0:0), %arg116: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_898.1"} loc("LlamaForCausalLM":0:0), %arg117: tensor<1xf32> {ttir.name = "input_1_multiply_899"} loc("LlamaForCausalLM":0:0), %arg118: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_900.1"} loc("LlamaForCausalLM":0:0), %arg119: tensor<1xf32> {ttir.name = "input_1_multiply_908"} loc("LlamaForCausalLM":0:0), %arg120: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_909"} loc("LlamaForCausalLM":0:0), %arg121: tensor<1xf32> {ttir.name = "input_1_add_930"} loc("LlamaForCausalLM":0:0), %arg122: tensor<1xf32> {ttir.name = "input_1_add_950"} loc("LlamaForCausalLM":0:0), %arg123: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_960"} loc("LlamaForCausalLM":0:0), %arg124: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_970.1"} loc("LlamaForCausalLM":0:0), %arg125: tensor<1xf32> {ttir.name = "input_1_multiply_971"} loc("LlamaForCausalLM":0:0), %arg126: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_972.1"} loc("LlamaForCausalLM":0:0), %arg127: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_984.1"} loc("LlamaForCausalLM":0:0), %arg128: tensor<1xf32> {ttir.name = "input_1_multiply_985"} loc("LlamaForCausalLM":0:0), %arg129: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_986.1"} loc("LlamaForCausalLM":0:0), %arg130: tensor<1xf32> {ttir.name = "input_1_multiply_994"} loc("LlamaForCausalLM":0:0), %arg131: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_995"} loc("LlamaForCausalLM":0:0), %arg132: tensor<1xf32> {ttir.name = "input_1_add_1016"} loc("LlamaForCausalLM":0:0), %arg133: tensor<1xf32> {ttir.name = "input_1_add_1036"} loc("LlamaForCausalLM":0:0), %arg134: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1046"} loc("LlamaForCausalLM":0:0), %arg135: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1056.1"} loc("LlamaForCausalLM":0:0), %arg136: tensor<1xf32> {ttir.name = "input_1_multiply_1057"} loc("LlamaForCausalLM":0:0), %arg137: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1058.1"} loc("LlamaForCausalLM":0:0), %arg138: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1070.1"} loc("LlamaForCausalLM":0:0), %arg139: tensor<1xf32> {ttir.name = "input_1_multiply_1071"} loc("LlamaForCausalLM":0:0), %arg140: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1072.1"} loc("LlamaForCausalLM":0:0), %arg141: tensor<1xf32> {ttir.name = "input_1_multiply_1080"} loc("LlamaForCausalLM":0:0), %arg142: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1081"} loc("LlamaForCausalLM":0:0), %arg143: tensor<1xf32> {ttir.name = "input_1_add_1102"} loc("LlamaForCausalLM":0:0), %arg144: tensor<1xf32> {ttir.name = "input_1_add_1122"} loc("LlamaForCausalLM":0:0), %arg145: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1132"} loc("LlamaForCausalLM":0:0), %arg146: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1142.1"} loc("LlamaForCausalLM":0:0), %arg147: tensor<1xf32> {ttir.name = "input_1_multiply_1143"} loc("LlamaForCausalLM":0:0), %arg148: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1144.1"} loc("LlamaForCausalLM":0:0), %arg149: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1156.1"} loc("LlamaForCausalLM":0:0), %arg150: tensor<1xf32> {ttir.name = "input_1_multiply_1157"} loc("LlamaForCausalLM":0:0), %arg151: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1158.1"} loc("LlamaForCausalLM":0:0), %arg152: tensor<1xf32> {ttir.name = "input_1_multiply_1166"} loc("LlamaForCausalLM":0:0), %arg153: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1167"} loc("LlamaForCausalLM":0:0), %arg154: tensor<1xf32> {ttir.name = "input_1_add_1188"} loc("LlamaForCausalLM":0:0), %arg155: tensor<1xf32> {ttir.name = "input_1_add_1208"} loc("LlamaForCausalLM":0:0), %arg156: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1218"} loc("LlamaForCausalLM":0:0), %arg157: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1228.1"} loc("LlamaForCausalLM":0:0), %arg158: tensor<1xf32> {ttir.name = "input_1_multiply_1229"} loc("LlamaForCausalLM":0:0), %arg159: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1230.1"} loc("LlamaForCausalLM":0:0), %arg160: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1242.1"} loc("LlamaForCausalLM":0:0), %arg161: tensor<1xf32> {ttir.name = "input_1_multiply_1243"} loc("LlamaForCausalLM":0:0), %arg162: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1244.1"} loc("LlamaForCausalLM":0:0), %arg163: tensor<1xf32> {ttir.name = "input_1_multiply_1252"} loc("LlamaForCausalLM":0:0), %arg164: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1253"} loc("LlamaForCausalLM":0:0), %arg165: tensor<1xf32> {ttir.name = "input_1_add_1274"} loc("LlamaForCausalLM":0:0), %arg166: tensor<1xf32> {ttir.name = "input_1_add_1294"} loc("LlamaForCausalLM":0:0), %arg167: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1304"} loc("LlamaForCausalLM":0:0), %arg168: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1314.1"} loc("LlamaForCausalLM":0:0), %arg169: tensor<1xf32> {ttir.name = "input_1_multiply_1315"} loc("LlamaForCausalLM":0:0), %arg170: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1316.1"} loc("LlamaForCausalLM":0:0), %arg171: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1328.1"} loc("LlamaForCausalLM":0:0), %arg172: tensor<1xf32> {ttir.name = "input_1_multiply_1329"} loc("LlamaForCausalLM":0:0), %arg173: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1330.1"} loc("LlamaForCausalLM":0:0), %arg174: tensor<1xf32> {ttir.name = "input_1_multiply_1338"} loc("LlamaForCausalLM":0:0), %arg175: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1339"} loc("LlamaForCausalLM":0:0), %arg176: tensor<1xf32> {ttir.name = "input_1_add_1360"} loc("LlamaForCausalLM":0:0), %arg177: tensor<1xf32> {ttir.name = "input_1_add_1380"} loc("LlamaForCausalLM":0:0), %arg178: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1390"} loc("LlamaForCausalLM":0:0), %arg179: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1400.1"} loc("LlamaForCausalLM":0:0), %arg180: tensor<1xf32> {ttir.name = "input_1_multiply_1401"} loc("LlamaForCausalLM":0:0), %arg181: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1402.1"} loc("LlamaForCausalLM":0:0), %arg182: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1414.1"} loc("LlamaForCausalLM":0:0), %arg183: tensor<1xf32> {ttir.name = "input_1_multiply_1415"} loc("LlamaForCausalLM":0:0), %arg184: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1416.1"} loc("LlamaForCausalLM":0:0), %arg185: tensor<1xf32> {ttir.name = "input_1_multiply_1424"} loc("LlamaForCausalLM":0:0), %arg186: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1425"} loc("LlamaForCausalLM":0:0), %arg187: tensor<1xf32> {ttir.name = "input_1_add_1446"} loc("LlamaForCausalLM":0:0), %arg188: tensor<1xf32> {ttir.name = "input_1_add_1466"} loc("LlamaForCausalLM":0:0), %arg189: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1476"} loc("LlamaForCausalLM":0:0), %arg190: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1486.1"} loc("LlamaForCausalLM":0:0), %arg191: tensor<1xf32> {ttir.name = "input_1_multiply_1487"} loc("LlamaForCausalLM":0:0), %arg192: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1488.1"} loc("LlamaForCausalLM":0:0), %arg193: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1500.1"} loc("LlamaForCausalLM":0:0), %arg194: tensor<1xf32> {ttir.name = "input_1_multiply_1501"} loc("LlamaForCausalLM":0:0), %arg195: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1502.1"} loc("LlamaForCausalLM":0:0), %arg196: tensor<1xf32> {ttir.name = "input_1_multiply_1510"} loc("LlamaForCausalLM":0:0), %arg197: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1511"} loc("LlamaForCausalLM":0:0), %arg198: tensor<1xf32> {ttir.name = "input_1_add_1532"} loc("LlamaForCausalLM":0:0), %arg199: tensor<1xf32> {ttir.name = "input_1_add_1552"} loc("LlamaForCausalLM":0:0), %arg200: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1562"} loc("LlamaForCausalLM":0:0), %arg201: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1572.1"} loc("LlamaForCausalLM":0:0), %arg202: tensor<1xf32> {ttir.name = "input_1_multiply_1573"} loc("LlamaForCausalLM":0:0), %arg203: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1574.1"} loc("LlamaForCausalLM":0:0), %arg204: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1586.1"} loc("LlamaForCausalLM":0:0), %arg205: tensor<1xf32> {ttir.name = "input_1_multiply_1587"} loc("LlamaForCausalLM":0:0), %arg206: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1588.1"} loc("LlamaForCausalLM":0:0), %arg207: tensor<1xf32> {ttir.name = "input_1_multiply_1596"} loc("LlamaForCausalLM":0:0), %arg208: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1597"} loc("LlamaForCausalLM":0:0), %arg209: tensor<1xf32> {ttir.name = "input_1_add_1618"} loc("LlamaForCausalLM":0:0), %arg210: tensor<1xf32> {ttir.name = "input_1_add_1638"} loc("LlamaForCausalLM":0:0), %arg211: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1648"} loc("LlamaForCausalLM":0:0), %arg212: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1658.1"} loc("LlamaForCausalLM":0:0), %arg213: tensor<1xf32> {ttir.name = "input_1_multiply_1659"} loc("LlamaForCausalLM":0:0), %arg214: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1660.1"} loc("LlamaForCausalLM":0:0), %arg215: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1672.1"} loc("LlamaForCausalLM":0:0), %arg216: tensor<1xf32> {ttir.name = "input_1_multiply_1673"} loc("LlamaForCausalLM":0:0), %arg217: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1674.1"} loc("LlamaForCausalLM":0:0), %arg218: tensor<1xf32> {ttir.name = "input_1_multiply_1682"} loc("LlamaForCausalLM":0:0), %arg219: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1683"} loc("LlamaForCausalLM":0:0), %arg220: tensor<1xf32> {ttir.name = "input_1_add_1704"} loc("LlamaForCausalLM":0:0), %arg221: tensor<1xf32> {ttir.name = "input_1_add_1724"} loc("LlamaForCausalLM":0:0), %arg222: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1734"} loc("LlamaForCausalLM":0:0), %arg223: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1744.1"} loc("LlamaForCausalLM":0:0), %arg224: tensor<1xf32> {ttir.name = "input_1_multiply_1745"} loc("LlamaForCausalLM":0:0), %arg225: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1746.1"} loc("LlamaForCausalLM":0:0), %arg226: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1758.1"} loc("LlamaForCausalLM":0:0), %arg227: tensor<1xf32> {ttir.name = "input_1_multiply_1759"} loc("LlamaForCausalLM":0:0), %arg228: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1760.1"} loc("LlamaForCausalLM":0:0), %arg229: tensor<1xf32> {ttir.name = "input_1_multiply_1768"} loc("LlamaForCausalLM":0:0), %arg230: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1769"} loc("LlamaForCausalLM":0:0), %arg231: tensor<1xf32> {ttir.name = "input_1_add_1790"} loc("LlamaForCausalLM":0:0), %arg232: tensor<1xf32> {ttir.name = "input_1_add_1810"} loc("LlamaForCausalLM":0:0), %arg233: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1820"} loc("LlamaForCausalLM":0:0), %arg234: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1830.1"} loc("LlamaForCausalLM":0:0), %arg235: tensor<1xf32> {ttir.name = "input_1_multiply_1831"} loc("LlamaForCausalLM":0:0), %arg236: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1832.1"} loc("LlamaForCausalLM":0:0), %arg237: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1844.1"} loc("LlamaForCausalLM":0:0), %arg238: tensor<1xf32> {ttir.name = "input_1_multiply_1845"} loc("LlamaForCausalLM":0:0), %arg239: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1846.1"} loc("LlamaForCausalLM":0:0), %arg240: tensor<1xf32> {ttir.name = "input_1_multiply_1854"} loc("LlamaForCausalLM":0:0), %arg241: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1855"} loc("LlamaForCausalLM":0:0), %arg242: tensor<1xf32> {ttir.name = "input_1_add_1876"} loc("LlamaForCausalLM":0:0), %arg243: tensor<1xf32> {ttir.name = "input_1_add_1896"} loc("LlamaForCausalLM":0:0), %arg244: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1906"} loc("LlamaForCausalLM":0:0), %arg245: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1916.1"} loc("LlamaForCausalLM":0:0), %arg246: tensor<1xf32> {ttir.name = "input_1_multiply_1917"} loc("LlamaForCausalLM":0:0), %arg247: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1918.1"} loc("LlamaForCausalLM":0:0), %arg248: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1930.1"} loc("LlamaForCausalLM":0:0), %arg249: tensor<1xf32> {ttir.name = "input_1_multiply_1931"} loc("LlamaForCausalLM":0:0), %arg250: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1932.1"} loc("LlamaForCausalLM":0:0), %arg251: tensor<1xf32> {ttir.name = "input_1_multiply_1940"} loc("LlamaForCausalLM":0:0), %arg252: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1941"} loc("LlamaForCausalLM":0:0), %arg253: tensor<1xf32> {ttir.name = "input_1_add_1962"} loc("LlamaForCausalLM":0:0), %arg254: tensor<1xf32> {ttir.name = "input_1_add_1982"} loc("LlamaForCausalLM":0:0), %arg255: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1992"} loc("LlamaForCausalLM":0:0), %arg256: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2002.1"} loc("LlamaForCausalLM":0:0), %arg257: tensor<1xf32> {ttir.name = "input_1_multiply_2003"} loc("LlamaForCausalLM":0:0), %arg258: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2004.1"} loc("LlamaForCausalLM":0:0), %arg259: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2016.1"} loc("LlamaForCausalLM":0:0), %arg260: tensor<1xf32> {ttir.name = "input_1_multiply_2017"} loc("LlamaForCausalLM":0:0), %arg261: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2018.1"} loc("LlamaForCausalLM":0:0), %arg262: tensor<1xf32> {ttir.name = "input_1_multiply_2026"} loc("LlamaForCausalLM":0:0), %arg263: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_2027"} loc("LlamaForCausalLM":0:0), %arg264: tensor<1xf32> {ttir.name = "input_1_add_2048"} loc("LlamaForCausalLM":0:0), %arg265: tensor<1xf32> {ttir.name = "input_1_add_2068"} loc("LlamaForCausalLM":0:0), %arg266: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_2078"} loc("LlamaForCausalLM":0:0), %arg267: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2088.1"} loc("LlamaForCausalLM":0:0), %arg268: tensor<1xf32> {ttir.name = "input_1_multiply_2089"} loc("LlamaForCausalLM":0:0), %arg269: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2090.1"} loc("LlamaForCausalLM":0:0), %arg270: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2102.1"} loc("LlamaForCausalLM":0:0), %arg271: tensor<1xf32> {ttir.name = "input_1_multiply_2103"} loc("LlamaForCausalLM":0:0), %arg272: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2104.1"} loc("LlamaForCausalLM":0:0), %arg273: tensor<1xf32> {ttir.name = "input_1_multiply_2112"} loc("LlamaForCausalLM":0:0), %arg274: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_2113"} loc("LlamaForCausalLM":0:0), %arg275: tensor<1xf32> {ttir.name = "input_1_add_2134"} loc("LlamaForCausalLM":0:0), %arg276: tensor<1xf32> {ttir.name = "input_1_add_2154"} loc("LlamaForCausalLM":0:0), %arg277: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_2164"} loc("LlamaForCausalLM":0:0), %arg278: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2174.1"} loc("LlamaForCausalLM":0:0), %arg279: tensor<1xf32> {ttir.name = "input_1_multiply_2175"} loc("LlamaForCausalLM":0:0), %arg280: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2176.1"} loc("LlamaForCausalLM":0:0), %arg281: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2188.1"} loc("LlamaForCausalLM":0:0), %arg282: tensor<1xf32> {ttir.name = "input_1_multiply_2189"} loc("LlamaForCausalLM":0:0), %arg283: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2190.1"} loc("LlamaForCausalLM":0:0), %arg284: tensor<1xf32> {ttir.name = "input_1_multiply_2198"} loc("LlamaForCausalLM":0:0), %arg285: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_2199"} loc("LlamaForCausalLM":0:0), %arg286: tensor<1xf32> {ttir.name = "input_1_add_2220"} loc("LlamaForCausalLM":0:0), %arg287: tensor<1xf32> {ttir.name = "input_1_add_2240"} loc("LlamaForCausalLM":0:0), %arg288: tensor<3200xf32> {ttir.name = "model.norm.weight"} loc("LlamaForCausalLM":0:0), %arg289: tensor<32000x3200xf32> {ttir.name = "model.embed_tokens.weight"} loc("LlamaForCausalLM":0:0), %arg290: tensor<3200xf32> {ttir.name = "model.layers.0.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg291: tensor<3200x3200xf32> {ttir.name = "model.layers.0.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg292: tensor<3200x3200xf32> {ttir.name = "model.layers.0.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg293: tensor<3200x3200xf32> {ttir.name = "model.layers.0.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg294: tensor<3200x3200xf32> {ttir.name = "model.layers.0.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg295: tensor<3200xf32> {ttir.name = "model.layers.0.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg296: tensor<3200x8640xf32> {ttir.name = "model.layers.0.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg297: tensor<3200x8640xf32> {ttir.name = "model.layers.0.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg298: tensor<8640x3200xf32> {ttir.name = "model.layers.0.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg299: tensor<3200xf32> {ttir.name = "model.layers.1.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg300: tensor<3200x3200xf32> {ttir.name = "model.layers.1.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg301: tensor<3200x3200xf32> {ttir.name = "model.layers.1.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg302: tensor<3200x3200xf32> {ttir.name = "model.layers.1.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg303: tensor<3200x3200xf32> {ttir.name = "model.layers.1.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg304: tensor<3200xf32> {ttir.name = "model.layers.1.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg305: tensor<3200x8640xf32> {ttir.name = "model.layers.1.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg306: tensor<3200x8640xf32> {ttir.name = "model.layers.1.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg307: tensor<8640x3200xf32> {ttir.name = "model.layers.1.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg308: tensor<3200xf32> {ttir.name = "model.layers.2.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg309: tensor<3200x3200xf32> {ttir.name = "model.layers.2.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg310: tensor<3200x3200xf32> {ttir.name = "model.layers.2.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg311: tensor<3200x3200xf32> {ttir.name = "model.layers.2.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg312: tensor<3200x3200xf32> {ttir.name = "model.layers.2.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg313: tensor<3200xf32> {ttir.name = "model.layers.2.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg314: tensor<3200x8640xf32> {ttir.name = "model.layers.2.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg315: tensor<3200x8640xf32> {ttir.name = "model.layers.2.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg316: tensor<8640x3200xf32> {ttir.name = "model.layers.2.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg317: tensor<3200xf32> {ttir.name = "model.layers.3.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg318: tensor<3200x3200xf32> {ttir.name = "model.layers.3.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg319: tensor<3200x3200xf32> {ttir.name = "model.layers.3.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg320: tensor<3200x3200xf32> {ttir.name = "model.layers.3.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg321: tensor<3200x3200xf32> {ttir.name = "model.layers.3.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg322: tensor<3200xf32> {ttir.name = "model.layers.3.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg323: tensor<3200x8640xf32> {ttir.name = "model.layers.3.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg324: tensor<3200x8640xf32> {ttir.name = "model.layers.3.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg325: tensor<8640x3200xf32> {ttir.name = "model.layers.3.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg326: tensor<3200xf32> {ttir.name = "model.layers.4.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg327: tensor<3200x3200xf32> {ttir.name = "model.layers.4.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg328: tensor<3200x3200xf32> {ttir.name = "model.layers.4.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg329: tensor<3200x3200xf32> {ttir.name = "model.layers.4.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg330: tensor<3200x3200xf32> {ttir.name = "model.layers.4.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg331: tensor<3200xf32> {ttir.name = "model.layers.4.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg332: tensor<3200x8640xf32> {ttir.name = "model.layers.4.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg333: tensor<3200x8640xf32> {ttir.name = "model.layers.4.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg334: tensor<8640x3200xf32> {ttir.name = "model.layers.4.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg335: tensor<3200xf32> {ttir.name = "model.layers.5.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg336: tensor<3200x3200xf32> {ttir.name = "model.layers.5.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg337: tensor<3200x3200xf32> {ttir.name = "model.layers.5.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg338: tensor<3200x3200xf32> {ttir.name = "model.layers.5.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg339: tensor<3200x3200xf32> {ttir.name = "model.layers.5.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg340: tensor<3200xf32> {ttir.name = "model.layers.5.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg341: tensor<3200x8640xf32> {ttir.name = "model.layers.5.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg342: tensor<3200x8640xf32> {ttir.name = "model.layers.5.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg343: tensor<8640x3200xf32> {ttir.name = "model.layers.5.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg344: tensor<3200xf32> {ttir.name = "model.layers.6.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg345: tensor<3200x3200xf32> {ttir.name = "model.layers.6.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg346: tensor<3200x3200xf32> {ttir.name = "model.layers.6.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg347: tensor<3200x3200xf32> {ttir.name = "model.layers.6.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg348: tensor<3200x3200xf32> {ttir.name = "model.layers.6.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg349: tensor<3200xf32> {ttir.name = "model.layers.6.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg350: tensor<3200x8640xf32> {ttir.name = "model.layers.6.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg351: tensor<3200x8640xf32> {ttir.name = "model.layers.6.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg352: tensor<8640x3200xf32> {ttir.name = "model.layers.6.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg353: tensor<3200xf32> {ttir.name = "model.layers.7.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg354: tensor<3200x3200xf32> {ttir.name = "model.layers.7.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg355: tensor<3200x3200xf32> {ttir.name = "model.layers.7.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg356: tensor<3200x3200xf32> {ttir.name = "model.layers.7.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg357: tensor<3200x3200xf32> {ttir.name = "model.layers.7.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg358: tensor<3200xf32> {ttir.name = "model.layers.7.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg359: tensor<3200x8640xf32> {ttir.name = "model.layers.7.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg360: tensor<3200x8640xf32> {ttir.name = "model.layers.7.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg361: tensor<8640x3200xf32> {ttir.name = "model.layers.7.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg362: tensor<3200xf32> {ttir.name = "model.layers.8.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg363: tensor<3200x3200xf32> {ttir.name = "model.layers.8.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg364: tensor<3200x3200xf32> {ttir.name = "model.layers.8.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg365: tensor<3200x3200xf32> {ttir.name = "model.layers.8.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg366: tensor<3200x3200xf32> {ttir.name = "model.layers.8.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg367: tensor<3200xf32> {ttir.name = "model.layers.8.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg368: tensor<3200x8640xf32> {ttir.name = "model.layers.8.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg369: tensor<3200x8640xf32> {ttir.name = "model.layers.8.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg370: tensor<8640x3200xf32> {ttir.name = "model.layers.8.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg371: tensor<3200xf32> {ttir.name = "model.layers.9.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg372: tensor<3200x3200xf32> {ttir.name = "model.layers.9.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg373: tensor<3200x3200xf32> {ttir.name = "model.layers.9.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg374: tensor<3200x3200xf32> {ttir.name = "model.layers.9.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg375: tensor<3200x3200xf32> {ttir.name = "model.layers.9.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg376: tensor<3200xf32> {ttir.name = "model.layers.9.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg377: tensor<3200x8640xf32> {ttir.name = "model.layers.9.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg378: tensor<3200x8640xf32> {ttir.name = "model.layers.9.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg379: tensor<8640x3200xf32> {ttir.name = "model.layers.9.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg380: tensor<3200xf32> {ttir.name = "model.layers.10.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg381: tensor<3200x3200xf32> {ttir.name = "model.layers.10.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg382: tensor<3200x3200xf32> {ttir.name = "model.layers.10.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg383: tensor<3200x3200xf32> {ttir.name = "model.layers.10.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg384: tensor<3200x3200xf32> {ttir.name = "model.layers.10.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg385: tensor<3200xf32> {ttir.name = "model.layers.10.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg386: tensor<3200x8640xf32> {ttir.name = "model.layers.10.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg387: tensor<3200x8640xf32> {ttir.name = "model.layers.10.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg388: tensor<8640x3200xf32> {ttir.name = "model.layers.10.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg389: tensor<3200xf32> {ttir.name = "model.layers.11.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg390: tensor<3200x3200xf32> {ttir.name = "model.layers.11.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg391: tensor<3200x3200xf32> {ttir.name = "model.layers.11.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg392: tensor<3200x3200xf32> {ttir.name = "model.layers.11.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg393: tensor<3200x3200xf32> {ttir.name = "model.layers.11.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg394: tensor<3200xf32> {ttir.name = "model.layers.11.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg395: tensor<3200x8640xf32> {ttir.name = "model.layers.11.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg396: tensor<3200x8640xf32> {ttir.name = "model.layers.11.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg397: tensor<8640x3200xf32> {ttir.name = "model.layers.11.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg398: tensor<3200xf32> {ttir.name = "model.layers.12.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg399: tensor<3200x3200xf32> {ttir.name = "model.layers.12.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg400: tensor<3200x3200xf32> {ttir.name = "model.layers.12.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg401: tensor<3200x3200xf32> {ttir.name = "model.layers.12.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg402: tensor<3200x3200xf32> {ttir.name = "model.layers.12.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg403: tensor<3200xf32> {ttir.name = "model.layers.12.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg404: tensor<3200x8640xf32> {ttir.name = "model.layers.12.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg405: tensor<3200x8640xf32> {ttir.name = "model.layers.12.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg406: tensor<8640x3200xf32> {ttir.name = "model.layers.12.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg407: tensor<3200xf32> {ttir.name = "model.layers.13.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg408: tensor<3200x3200xf32> {ttir.name = "model.layers.13.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg409: tensor<3200x3200xf32> {ttir.name = "model.layers.13.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg410: tensor<3200x3200xf32> {ttir.name = "model.layers.13.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg411: tensor<3200x3200xf32> {ttir.name = "model.layers.13.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg412: tensor<3200xf32> {ttir.name = "model.layers.13.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg413: tensor<3200x8640xf32> {ttir.name = "model.layers.13.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg414: tensor<3200x8640xf32> {ttir.name = "model.layers.13.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg415: tensor<8640x3200xf32> {ttir.name = "model.layers.13.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg416: tensor<3200xf32> {ttir.name = "model.layers.14.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg417: tensor<3200x3200xf32> {ttir.name = "model.layers.14.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg418: tensor<3200x3200xf32> {ttir.name = "model.layers.14.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg419: tensor<3200x3200xf32> {ttir.name = "model.layers.14.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg420: tensor<3200x3200xf32> {ttir.name = "model.layers.14.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg421: tensor<3200xf32> {ttir.name = "model.layers.14.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg422: tensor<3200x8640xf32> {ttir.name = "model.layers.14.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg423: tensor<3200x8640xf32> {ttir.name = "model.layers.14.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg424: tensor<8640x3200xf32> {ttir.name = "model.layers.14.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg425: tensor<3200xf32> {ttir.name = "model.layers.15.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg426: tensor<3200x3200xf32> {ttir.name = "model.layers.15.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg427: tensor<3200x3200xf32> {ttir.name = "model.layers.15.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg428: tensor<3200x3200xf32> {ttir.name = "model.layers.15.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg429: tensor<3200x3200xf32> {ttir.name = "model.layers.15.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg430: tensor<3200xf32> {ttir.name = "model.layers.15.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg431: tensor<3200x8640xf32> {ttir.name = "model.layers.15.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg432: tensor<3200x8640xf32> {ttir.name = "model.layers.15.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg433: tensor<8640x3200xf32> {ttir.name = "model.layers.15.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg434: tensor<3200xf32> {ttir.name = "model.layers.16.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg435: tensor<3200x3200xf32> {ttir.name = "model.layers.16.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg436: tensor<3200x3200xf32> {ttir.name = "model.layers.16.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg437: tensor<3200x3200xf32> {ttir.name = "model.layers.16.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg438: tensor<3200x3200xf32> {ttir.name = "model.layers.16.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg439: tensor<3200xf32> {ttir.name = "model.layers.16.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg440: tensor<3200x8640xf32> {ttir.name = "model.layers.16.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg441: tensor<3200x8640xf32> {ttir.name = "model.layers.16.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg442: tensor<8640x3200xf32> {ttir.name = "model.layers.16.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg443: tensor<3200xf32> {ttir.name = "model.layers.17.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg444: tensor<3200x3200xf32> {ttir.name = "model.layers.17.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg445: tensor<3200x3200xf32> {ttir.name = "model.layers.17.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg446: tensor<3200x3200xf32> {ttir.name = "model.layers.17.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg447: tensor<3200x3200xf32> {ttir.name = "model.layers.17.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg448: tensor<3200xf32> {ttir.name = "model.layers.17.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg449: tensor<3200x8640xf32> {ttir.name = "model.layers.17.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg450: tensor<3200x8640xf32> {ttir.name = "model.layers.17.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg451: tensor<8640x3200xf32> {ttir.name = "model.layers.17.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg452: tensor<3200xf32> {ttir.name = "model.layers.18.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg453: tensor<3200x3200xf32> {ttir.name = "model.layers.18.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg454: tensor<3200x3200xf32> {ttir.name = "model.layers.18.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg455: tensor<3200x3200xf32> {ttir.name = "model.layers.18.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg456: tensor<3200x3200xf32> {ttir.name = "model.layers.18.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg457: tensor<3200xf32> {ttir.name = "model.layers.18.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg458: tensor<3200x8640xf32> {ttir.name = "model.layers.18.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg459: tensor<3200x8640xf32> {ttir.name = "model.layers.18.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg460: tensor<8640x3200xf32> {ttir.name = "model.layers.18.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg461: tensor<3200xf32> {ttir.name = "model.layers.19.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg462: tensor<3200x3200xf32> {ttir.name = "model.layers.19.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg463: tensor<3200x3200xf32> {ttir.name = "model.layers.19.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg464: tensor<3200x3200xf32> {ttir.name = "model.layers.19.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg465: tensor<3200x3200xf32> {ttir.name = "model.layers.19.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg466: tensor<3200xf32> {ttir.name = "model.layers.19.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg467: tensor<3200x8640xf32> {ttir.name = "model.layers.19.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg468: tensor<3200x8640xf32> {ttir.name = "model.layers.19.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg469: tensor<8640x3200xf32> {ttir.name = "model.layers.19.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg470: tensor<3200xf32> {ttir.name = "model.layers.20.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg471: tensor<3200x3200xf32> {ttir.name = "model.layers.20.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg472: tensor<3200x3200xf32> {ttir.name = "model.layers.20.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg473: tensor<3200x3200xf32> {ttir.name = "model.layers.20.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg474: tensor<3200x3200xf32> {ttir.name = "model.layers.20.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg475: tensor<3200xf32> {ttir.name = "model.layers.20.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg476: tensor<3200x8640xf32> {ttir.name = "model.layers.20.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg477: tensor<3200x8640xf32> {ttir.name = "model.layers.20.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg478: tensor<8640x3200xf32> {ttir.name = "model.layers.20.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg479: tensor<3200xf32> {ttir.name = "model.layers.21.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg480: tensor<3200x3200xf32> {ttir.name = "model.layers.21.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg481: tensor<3200x3200xf32> {ttir.name = "model.layers.21.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg482: tensor<3200x3200xf32> {ttir.name = "model.layers.21.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg483: tensor<3200x3200xf32> {ttir.name = "model.layers.21.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg484: tensor<3200xf32> {ttir.name = "model.layers.21.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg485: tensor<3200x8640xf32> {ttir.name = "model.layers.21.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg486: tensor<3200x8640xf32> {ttir.name = "model.layers.21.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg487: tensor<8640x3200xf32> {ttir.name = "model.layers.21.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg488: tensor<3200xf32> {ttir.name = "model.layers.22.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg489: tensor<3200x3200xf32> {ttir.name = "model.layers.22.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg490: tensor<3200x3200xf32> {ttir.name = "model.layers.22.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg491: tensor<3200x3200xf32> {ttir.name = "model.layers.22.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg492: tensor<3200x3200xf32> {ttir.name = "model.layers.22.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg493: tensor<3200xf32> {ttir.name = "model.layers.22.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg494: tensor<3200x8640xf32> {ttir.name = "model.layers.22.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg495: tensor<3200x8640xf32> {ttir.name = "model.layers.22.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg496: tensor<8640x3200xf32> {ttir.name = "model.layers.22.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg497: tensor<3200xf32> {ttir.name = "model.layers.23.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg498: tensor<3200x3200xf32> {ttir.name = "model.layers.23.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg499: tensor<3200x3200xf32> {ttir.name = "model.layers.23.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg500: tensor<3200x3200xf32> {ttir.name = "model.layers.23.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg501: tensor<3200x3200xf32> {ttir.name = "model.layers.23.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg502: tensor<3200xf32> {ttir.name = "model.layers.23.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg503: tensor<3200x8640xf32> {ttir.name = "model.layers.23.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg504: tensor<3200x8640xf32> {ttir.name = "model.layers.23.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg505: tensor<8640x3200xf32> {ttir.name = "model.layers.23.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg506: tensor<3200xf32> {ttir.name = "model.layers.24.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg507: tensor<3200x3200xf32> {ttir.name = "model.layers.24.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg508: tensor<3200x3200xf32> {ttir.name = "model.layers.24.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg509: tensor<3200x3200xf32> {ttir.name = "model.layers.24.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg510: tensor<3200x3200xf32> {ttir.name = "model.layers.24.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg511: tensor<3200xf32> {ttir.name = "model.layers.24.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg512: tensor<3200x8640xf32> {ttir.name = "model.layers.24.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg513: tensor<3200x8640xf32> {ttir.name = "model.layers.24.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg514: tensor<8640x3200xf32> {ttir.name = "model.layers.24.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg515: tensor<3200xf32> {ttir.name = "model.layers.25.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg516: tensor<3200x3200xf32> {ttir.name = "model.layers.25.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg517: tensor<3200x3200xf32> {ttir.name = "model.layers.25.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg518: tensor<3200x3200xf32> {ttir.name = "model.layers.25.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg519: tensor<3200x3200xf32> {ttir.name = "model.layers.25.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg520: tensor<3200xf32> {ttir.name = "model.layers.25.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg521: tensor<3200x8640xf32> {ttir.name = "model.layers.25.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg522: tensor<3200x8640xf32> {ttir.name = "model.layers.25.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg523: tensor<8640x3200xf32> {ttir.name = "model.layers.25.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg524: tensor<3200x32000xf32> {ttir.name = "lm_head.weight"} loc("LlamaForCausalLM":0:0)) -> (tensor<1x12x32000xf32> {ttir.name = "LlamaForCausalLM.output_matmul_2246"}) {
+    %0 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2091)
+    %1 = "ttir.embedding"(%arg0, %arg289, %0) <{operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12xi32>, tensor<32000x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2091)
+    %2 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2092)
+    %3 = "ttir.multiply"(%1, %1, %2) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2092)
+    %4 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2093)
+    %5 = "ttir.mean"(%3, %4) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2093)
+    %6 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2094)
+    %7 = "ttir.add"(%5, %arg1, %6) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2094)
+    %8 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2095)
+    %9 = "ttir.sqrt"(%7, %8) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2095)
+    %10 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2096)
+    %11 = "ttir.reciprocal"(%9, %10) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2096)
+    %12 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2097)
+    %13 = "ttir.multiply"(%1, %11, %12) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2097)
+    %14 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2098)
+    %15 = "ttir.multiply"(%arg290, %13, %14) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2098)
+    %16 = tensor.empty() : tensor<12x3200xf32> loc(#loc2099)
+    %17 = "ttir.squeeze"(%15, %16) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2099)
+    %18 = tensor.empty() : tensor<12x3200xf32> loc(#loc2100)
+    %19 = "ttir.matmul"(%17, %arg291, %18) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2100)
+    %20 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2101)
+    %21 = "ttir.reshape"(%19, %20) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2101)
+    %22 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2102)
+    %23 = "ttir.transpose"(%21, %22) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2102)
+    %24 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2103)
+    %25 = "ttir.concat"(%arg2, %arg2, %24) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2103)
+    %26 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2104)
+    %27 = "ttir.sin"(%25, %26) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2104)
+    %28 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2105)
+    %29 = "ttir.unsqueeze"(%27, %28) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2105)
+    %30 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2106)
+    %31 = "ttir.multiply"(%23, %29, %30) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2106)
+    %32 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2107)
+    %33 = "ttir.transpose"(%23, %32) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2107)
+    %34 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2108)
+    %35 = "ttir.matmul"(%arg3, %33, %34) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2108)
+    %36 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2109)
+    %37 = "ttir.transpose"(%35, %36) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2109)
+    %38 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2110)
+    %39 = "ttir.multiply"(%37, %arg4, %38) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2110)
+    %40 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2111)
+    %41 = "ttir.transpose"(%23, %40) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2111)
+    %42 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2112)
+    %43 = "ttir.matmul"(%arg5, %41, %42) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2112)
+    %44 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2113)
+    %45 = "ttir.transpose"(%43, %44) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2113)
+    %46 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2114)
+    %47 = "ttir.concat"(%39, %45, %46) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2114)
+    %48 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2115)
+    %49 = "ttir.cos"(%25, %48) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2115)
+    %50 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2116)
+    %51 = "ttir.unsqueeze"(%49, %50) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2116)
+    %52 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2117)
+    %53 = "ttir.multiply"(%47, %51, %52) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2117)
+    %54 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2118)
+    %55 = "ttir.add"(%31, %53, %54) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2118)
+    %56 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2119)
+    %57 = "ttir.squeeze"(%55, %56) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2119)
+    %58 = tensor.empty() : tensor<12x3200xf32> loc(#loc2120)
+    %59 = "ttir.matmul"(%17, %arg292, %58) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2120)
+    %60 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2121)
+    %61 = "ttir.reshape"(%59, %60) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2121)
+    %62 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2122)
+    %63 = "ttir.transpose"(%61, %62) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2122)
+    %64 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2123)
+    %65 = "ttir.multiply"(%63, %29, %64) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2123)
+    %66 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2124)
+    %67 = "ttir.transpose"(%63, %66) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2124)
+    %68 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2125)
+    %69 = "ttir.matmul"(%arg6, %67, %68) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2125)
+    %70 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2126)
+    %71 = "ttir.transpose"(%69, %70) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2126)
+    %72 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2127)
+    %73 = "ttir.multiply"(%71, %arg7, %72) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2127)
+    %74 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2128)
+    %75 = "ttir.transpose"(%63, %74) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2128)
+    %76 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2129)
+    %77 = "ttir.matmul"(%arg8, %75, %76) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2129)
+    %78 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2130)
+    %79 = "ttir.transpose"(%77, %78) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2130)
+    %80 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2131)
+    %81 = "ttir.concat"(%73, %79, %80) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2131)
+    %82 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2132)
+    %83 = "ttir.multiply"(%81, %51, %82) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2132)
+    %84 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2133)
+    %85 = "ttir.add"(%65, %83, %84) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2133)
+    %86 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2134)
+    %87 = "ttir.squeeze"(%85, %86) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2134)
+    %88 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2135)
+    %89 = "ttir.transpose"(%87, %88) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2135)
+    %90 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2136)
+    %91 = "ttir.matmul"(%57, %89, %90) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2136)
+    %92 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2137)
+    %93 = "ttir.unsqueeze"(%91, %92) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2137)
+    %94 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2138)
+    %95 = "ttir.multiply"(%93, %arg9, %94) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2138)
+    %96 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2139)
+    %97 = "ttir.add"(%95, %arg10, %96) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2139)
+    %98 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2140)
+    %99 = "ttir.softmax"(%97, %98) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2140)
+    %100 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2141)
+    %101 = "ttir.squeeze"(%99, %100) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2141)
+    %102 = tensor.empty() : tensor<12x3200xf32> loc(#loc2142)
+    %103 = "ttir.matmul"(%17, %arg293, %102) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2142)
+    %104 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2143)
+    %105 = "ttir.reshape"(%103, %104) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2143)
+    %106 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2144)
+    %107 = "ttir.transpose"(%105, %106) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2144)
+    %108 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2145)
+    %109 = "ttir.transpose"(%107, %108) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2145)
+    %110 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2146)
+    %111 = "ttir.squeeze"(%109, %110) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2146)
+    %112 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2147)
+    %113 = "ttir.transpose"(%111, %112) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2147)
+    %114 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2148)
+    %115 = "ttir.matmul"(%101, %113, %114) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2148)
+    %116 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2149)
+    %117 = "ttir.unsqueeze"(%115, %116) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2149)
+    %118 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2150)
+    %119 = "ttir.transpose"(%117, %118) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2150)
+    %120 = tensor.empty() : tensor<12x3200xf32> loc(#loc2151)
+    %121 = "ttir.reshape"(%119, %120) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2151)
+    %122 = tensor.empty() : tensor<12x3200xf32> loc(#loc2152)
+    %123 = "ttir.matmul"(%121, %arg294, %122) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2152)
+    %124 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2153)
+    %125 = "ttir.unsqueeze"(%123, %124) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2153)
+    %126 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2154)
+    %127 = "ttir.add"(%1, %125, %126) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2154)
+    %128 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2155)
+    %129 = "ttir.multiply"(%127, %127, %128) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2155)
+    %130 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2156)
+    %131 = "ttir.mean"(%129, %130) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2156)
+    %132 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2157)
+    %133 = "ttir.add"(%131, %arg11, %132) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2157)
+    %134 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2158)
+    %135 = "ttir.sqrt"(%133, %134) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2158)
+    %136 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2159)
+    %137 = "ttir.reciprocal"(%135, %136) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2159)
+    %138 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2160)
+    %139 = "ttir.multiply"(%127, %137, %138) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2160)
+    %140 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2161)
+    %141 = "ttir.multiply"(%arg295, %139, %140) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2161)
+    %142 = tensor.empty() : tensor<12x3200xf32> loc(#loc2162)
+    %143 = "ttir.squeeze"(%141, %142) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2162)
+    %144 = tensor.empty() : tensor<12x8640xf32> loc(#loc2163)
+    %145 = "ttir.matmul"(%143, %arg296, %144) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2163)
+    %146 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2164)
+    %147 = "ttir.unsqueeze"(%145, %146) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2164)
+    %148 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2165)
+    %149 = "ttir.sigmoid"(%147, %148) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2165)
+    %150 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2166)
+    %151 = "ttir.multiply"(%147, %149, %150) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2166)
+    %152 = tensor.empty() : tensor<12x8640xf32> loc(#loc2167)
+    %153 = "ttir.matmul"(%143, %arg297, %152) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2167)
+    %154 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2168)
+    %155 = "ttir.unsqueeze"(%153, %154) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2168)
+    %156 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2169)
+    %157 = "ttir.multiply"(%151, %155, %156) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2169)
+    %158 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2170)
+    %159 = "ttir.matmul"(%157, %arg298, %158) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2170)
+    %160 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2171)
+    %161 = "ttir.add"(%127, %159, %160) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2171)
+    %162 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2172)
+    %163 = "ttir.multiply"(%161, %161, %162) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2172)
+    %164 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2173)
+    %165 = "ttir.mean"(%163, %164) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2173)
+    %166 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2174)
+    %167 = "ttir.add"(%165, %arg12, %166) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2174)
+    %168 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2175)
+    %169 = "ttir.sqrt"(%167, %168) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2175)
+    %170 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2176)
+    %171 = "ttir.reciprocal"(%169, %170) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2176)
+    %172 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2177)
+    %173 = "ttir.multiply"(%161, %171, %172) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2177)
+    %174 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2178)
+    %175 = "ttir.multiply"(%arg299, %173, %174) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2178)
+    %176 = tensor.empty() : tensor<12x3200xf32> loc(#loc2179)
+    %177 = "ttir.squeeze"(%175, %176) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2179)
+    %178 = tensor.empty() : tensor<12x3200xf32> loc(#loc2180)
+    %179 = "ttir.matmul"(%177, %arg300, %178) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2180)
+    %180 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2181)
+    %181 = "ttir.reshape"(%179, %180) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2181)
+    %182 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2182)
+    %183 = "ttir.transpose"(%181, %182) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2182)
+    %184 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2183)
+    %185 = "ttir.concat"(%arg13, %arg13, %184) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2183)
+    %186 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2184)
+    %187 = "ttir.sin"(%185, %186) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2184)
+    %188 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2185)
+    %189 = "ttir.unsqueeze"(%187, %188) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2185)
+    %190 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2186)
+    %191 = "ttir.multiply"(%183, %189, %190) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2186)
+    %192 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2187)
+    %193 = "ttir.transpose"(%183, %192) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2187)
+    %194 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2188)
+    %195 = "ttir.matmul"(%arg14, %193, %194) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2188)
+    %196 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2189)
+    %197 = "ttir.transpose"(%195, %196) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2189)
+    %198 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2190)
+    %199 = "ttir.multiply"(%197, %arg15, %198) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2190)
+    %200 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2191)
+    %201 = "ttir.transpose"(%183, %200) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2191)
+    %202 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2192)
+    %203 = "ttir.matmul"(%arg16, %201, %202) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2192)
+    %204 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2193)
+    %205 = "ttir.transpose"(%203, %204) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2193)
+    %206 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2194)
+    %207 = "ttir.concat"(%199, %205, %206) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2194)
+    %208 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2195)
+    %209 = "ttir.cos"(%185, %208) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2195)
+    %210 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2196)
+    %211 = "ttir.unsqueeze"(%209, %210) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2196)
+    %212 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2197)
+    %213 = "ttir.multiply"(%207, %211, %212) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2197)
+    %214 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2198)
+    %215 = "ttir.add"(%191, %213, %214) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2198)
+    %216 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2199)
+    %217 = "ttir.squeeze"(%215, %216) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2199)
+    %218 = tensor.empty() : tensor<12x3200xf32> loc(#loc2200)
+    %219 = "ttir.matmul"(%177, %arg301, %218) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2200)
+    %220 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2201)
+    %221 = "ttir.reshape"(%219, %220) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2201)
+    %222 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2202)
+    %223 = "ttir.transpose"(%221, %222) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2202)
+    %224 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2203)
+    %225 = "ttir.multiply"(%223, %189, %224) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2203)
+    %226 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2204)
+    %227 = "ttir.transpose"(%223, %226) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2204)
+    %228 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2205)
+    %229 = "ttir.matmul"(%arg17, %227, %228) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2205)
+    %230 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2206)
+    %231 = "ttir.transpose"(%229, %230) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2206)
+    %232 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2207)
+    %233 = "ttir.multiply"(%231, %arg18, %232) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2207)
+    %234 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2208)
+    %235 = "ttir.transpose"(%223, %234) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2208)
+    %236 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2209)
+    %237 = "ttir.matmul"(%arg19, %235, %236) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2209)
+    %238 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2210)
+    %239 = "ttir.transpose"(%237, %238) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2210)
+    %240 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2211)
+    %241 = "ttir.concat"(%233, %239, %240) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2211)
+    %242 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2212)
+    %243 = "ttir.multiply"(%241, %211, %242) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2212)
+    %244 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2213)
+    %245 = "ttir.add"(%225, %243, %244) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2213)
+    %246 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2214)
+    %247 = "ttir.squeeze"(%245, %246) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2214)
+    %248 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2215)
+    %249 = "ttir.transpose"(%247, %248) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2215)
+    %250 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2216)
+    %251 = "ttir.matmul"(%217, %249, %250) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2216)
+    %252 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2217)
+    %253 = "ttir.unsqueeze"(%251, %252) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2217)
+    %254 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2218)
+    %255 = "ttir.multiply"(%253, %arg20, %254) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2218)
+    %256 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2219)
+    %257 = "ttir.add"(%255, %arg21, %256) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2219)
+    %258 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2220)
+    %259 = "ttir.softmax"(%257, %258) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2220)
+    %260 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2221)
+    %261 = "ttir.squeeze"(%259, %260) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2221)
+    %262 = tensor.empty() : tensor<12x3200xf32> loc(#loc2222)
+    %263 = "ttir.matmul"(%177, %arg302, %262) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2222)
+    %264 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2223)
+    %265 = "ttir.reshape"(%263, %264) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2223)
+    %266 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2224)
+    %267 = "ttir.transpose"(%265, %266) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2224)
+    %268 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2225)
+    %269 = "ttir.transpose"(%267, %268) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2225)
+    %270 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2226)
+    %271 = "ttir.squeeze"(%269, %270) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2226)
+    %272 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2227)
+    %273 = "ttir.transpose"(%271, %272) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2227)
+    %274 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2228)
+    %275 = "ttir.matmul"(%261, %273, %274) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2228)
+    %276 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2229)
+    %277 = "ttir.unsqueeze"(%275, %276) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2229)
+    %278 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2230)
+    %279 = "ttir.transpose"(%277, %278) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2230)
+    %280 = tensor.empty() : tensor<12x3200xf32> loc(#loc2231)
+    %281 = "ttir.reshape"(%279, %280) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2231)
+    %282 = tensor.empty() : tensor<12x3200xf32> loc(#loc2232)
+    %283 = "ttir.matmul"(%281, %arg303, %282) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2232)
+    %284 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2233)
+    %285 = "ttir.unsqueeze"(%283, %284) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2233)
+    %286 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2234)
+    %287 = "ttir.add"(%161, %285, %286) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2234)
+    %288 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2235)
+    %289 = "ttir.multiply"(%287, %287, %288) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2235)
+    %290 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2236)
+    %291 = "ttir.mean"(%289, %290) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2236)
+    %292 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2237)
+    %293 = "ttir.add"(%291, %arg22, %292) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2237)
+    %294 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2238)
+    %295 = "ttir.sqrt"(%293, %294) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2238)
+    %296 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2239)
+    %297 = "ttir.reciprocal"(%295, %296) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2239)
+    %298 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2240)
+    %299 = "ttir.multiply"(%287, %297, %298) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2240)
+    %300 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2241)
+    %301 = "ttir.multiply"(%arg304, %299, %300) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2241)
+    %302 = tensor.empty() : tensor<12x3200xf32> loc(#loc2242)
+    %303 = "ttir.squeeze"(%301, %302) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2242)
+    %304 = tensor.empty() : tensor<12x8640xf32> loc(#loc2243)
+    %305 = "ttir.matmul"(%303, %arg305, %304) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2243)
+    %306 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2244)
+    %307 = "ttir.unsqueeze"(%305, %306) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2244)
+    %308 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2245)
+    %309 = "ttir.sigmoid"(%307, %308) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2245)
+    %310 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2246)
+    %311 = "ttir.multiply"(%307, %309, %310) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2246)
+    %312 = tensor.empty() : tensor<12x8640xf32> loc(#loc2247)
+    %313 = "ttir.matmul"(%303, %arg306, %312) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2247)
+    %314 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2248)
+    %315 = "ttir.unsqueeze"(%313, %314) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2248)
+    %316 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2249)
+    %317 = "ttir.multiply"(%311, %315, %316) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2249)
+    %318 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2250)
+    %319 = "ttir.matmul"(%317, %arg307, %318) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2250)
+    %320 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2251)
+    %321 = "ttir.add"(%287, %319, %320) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2251)
+    %322 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2252)
+    %323 = "ttir.multiply"(%321, %321, %322) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2252)
+    %324 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2253)
+    %325 = "ttir.mean"(%323, %324) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2253)
+    %326 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2254)
+    %327 = "ttir.add"(%325, %arg23, %326) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2254)
+    %328 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2255)
+    %329 = "ttir.sqrt"(%327, %328) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2255)
+    %330 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2256)
+    %331 = "ttir.reciprocal"(%329, %330) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2256)
+    %332 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2257)
+    %333 = "ttir.multiply"(%321, %331, %332) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2257)
+    %334 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2258)
+    %335 = "ttir.multiply"(%arg308, %333, %334) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2258)
+    %336 = tensor.empty() : tensor<12x3200xf32> loc(#loc2259)
+    %337 = "ttir.squeeze"(%335, %336) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2259)
+    %338 = tensor.empty() : tensor<12x3200xf32> loc(#loc2260)
+    %339 = "ttir.matmul"(%337, %arg309, %338) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2260)
+    %340 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2261)
+    %341 = "ttir.reshape"(%339, %340) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2261)
+    %342 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2262)
+    %343 = "ttir.transpose"(%341, %342) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2262)
+    %344 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2263)
+    %345 = "ttir.concat"(%arg24, %arg24, %344) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2263)
+    %346 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2264)
+    %347 = "ttir.sin"(%345, %346) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2264)
+    %348 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2265)
+    %349 = "ttir.unsqueeze"(%347, %348) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2265)
+    %350 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2266)
+    %351 = "ttir.multiply"(%343, %349, %350) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2266)
+    %352 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2267)
+    %353 = "ttir.transpose"(%343, %352) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2267)
+    %354 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2268)
+    %355 = "ttir.matmul"(%arg25, %353, %354) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2268)
+    %356 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2269)
+    %357 = "ttir.transpose"(%355, %356) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2269)
+    %358 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2270)
+    %359 = "ttir.multiply"(%357, %arg26, %358) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2270)
+    %360 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2271)
+    %361 = "ttir.transpose"(%343, %360) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2271)
+    %362 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2272)
+    %363 = "ttir.matmul"(%arg27, %361, %362) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2272)
+    %364 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2273)
+    %365 = "ttir.transpose"(%363, %364) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2273)
+    %366 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2274)
+    %367 = "ttir.concat"(%359, %365, %366) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2274)
+    %368 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2275)
+    %369 = "ttir.cos"(%345, %368) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2275)
+    %370 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2276)
+    %371 = "ttir.unsqueeze"(%369, %370) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2276)
+    %372 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2277)
+    %373 = "ttir.multiply"(%367, %371, %372) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2277)
+    %374 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2278)
+    %375 = "ttir.add"(%351, %373, %374) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2278)
+    %376 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2279)
+    %377 = "ttir.squeeze"(%375, %376) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2279)
+    %378 = tensor.empty() : tensor<12x3200xf32> loc(#loc2280)
+    %379 = "ttir.matmul"(%337, %arg310, %378) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2280)
+    %380 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2281)
+    %381 = "ttir.reshape"(%379, %380) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2281)
+    %382 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2282)
+    %383 = "ttir.transpose"(%381, %382) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2282)
+    %384 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2283)
+    %385 = "ttir.multiply"(%383, %349, %384) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2283)
+    %386 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2284)
+    %387 = "ttir.transpose"(%383, %386) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2284)
+    %388 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2285)
+    %389 = "ttir.matmul"(%arg28, %387, %388) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2285)
+    %390 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2286)
+    %391 = "ttir.transpose"(%389, %390) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2286)
+    %392 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2287)
+    %393 = "ttir.multiply"(%391, %arg29, %392) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2287)
+    %394 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2288)
+    %395 = "ttir.transpose"(%383, %394) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2288)
+    %396 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2289)
+    %397 = "ttir.matmul"(%arg30, %395, %396) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2289)
+    %398 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2290)
+    %399 = "ttir.transpose"(%397, %398) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2290)
+    %400 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2291)
+    %401 = "ttir.concat"(%393, %399, %400) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2291)
+    %402 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2292)
+    %403 = "ttir.multiply"(%401, %371, %402) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2292)
+    %404 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2293)
+    %405 = "ttir.add"(%385, %403, %404) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2293)
+    %406 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2294)
+    %407 = "ttir.squeeze"(%405, %406) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2294)
+    %408 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2295)
+    %409 = "ttir.transpose"(%407, %408) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2295)
+    %410 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2296)
+    %411 = "ttir.matmul"(%377, %409, %410) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2296)
+    %412 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2297)
+    %413 = "ttir.unsqueeze"(%411, %412) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2297)
+    %414 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2298)
+    %415 = "ttir.multiply"(%413, %arg31, %414) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2298)
+    %416 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2299)
+    %417 = "ttir.add"(%415, %arg32, %416) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2299)
+    %418 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2300)
+    %419 = "ttir.softmax"(%417, %418) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2300)
+    %420 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2301)
+    %421 = "ttir.squeeze"(%419, %420) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2301)
+    %422 = tensor.empty() : tensor<12x3200xf32> loc(#loc2302)
+    %423 = "ttir.matmul"(%337, %arg311, %422) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2302)
+    %424 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2303)
+    %425 = "ttir.reshape"(%423, %424) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2303)
+    %426 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2304)
+    %427 = "ttir.transpose"(%425, %426) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2304)
+    %428 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2305)
+    %429 = "ttir.transpose"(%427, %428) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2305)
+    %430 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2306)
+    %431 = "ttir.squeeze"(%429, %430) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2306)
+    %432 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2307)
+    %433 = "ttir.transpose"(%431, %432) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2307)
+    %434 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2308)
+    %435 = "ttir.matmul"(%421, %433, %434) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2308)
+    %436 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2309)
+    %437 = "ttir.unsqueeze"(%435, %436) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2309)
+    %438 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2310)
+    %439 = "ttir.transpose"(%437, %438) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2310)
+    %440 = tensor.empty() : tensor<12x3200xf32> loc(#loc2311)
+    %441 = "ttir.reshape"(%439, %440) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2311)
+    %442 = tensor.empty() : tensor<12x3200xf32> loc(#loc2312)
+    %443 = "ttir.matmul"(%441, %arg312, %442) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2312)
+    %444 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2313)
+    %445 = "ttir.unsqueeze"(%443, %444) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2313)
+    %446 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2314)
+    %447 = "ttir.add"(%321, %445, %446) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2314)
+    %448 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2315)
+    %449 = "ttir.multiply"(%447, %447, %448) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2315)
+    %450 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2316)
+    %451 = "ttir.mean"(%449, %450) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2316)
+    %452 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2317)
+    %453 = "ttir.add"(%451, %arg33, %452) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2317)
+    %454 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2318)
+    %455 = "ttir.sqrt"(%453, %454) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2318)
+    %456 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2319)
+    %457 = "ttir.reciprocal"(%455, %456) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2319)
+    %458 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2320)
+    %459 = "ttir.multiply"(%447, %457, %458) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2320)
+    %460 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2321)
+    %461 = "ttir.multiply"(%arg313, %459, %460) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2321)
+    %462 = tensor.empty() : tensor<12x3200xf32> loc(#loc2322)
+    %463 = "ttir.squeeze"(%461, %462) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2322)
+    %464 = tensor.empty() : tensor<12x8640xf32> loc(#loc2323)
+    %465 = "ttir.matmul"(%463, %arg314, %464) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2323)
+    %466 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2324)
+    %467 = "ttir.unsqueeze"(%465, %466) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2324)
+    %468 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2325)
+    %469 = "ttir.sigmoid"(%467, %468) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2325)
+    %470 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2326)
+    %471 = "ttir.multiply"(%467, %469, %470) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2326)
+    %472 = tensor.empty() : tensor<12x8640xf32> loc(#loc2327)
+    %473 = "ttir.matmul"(%463, %arg315, %472) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2327)
+    %474 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2328)
+    %475 = "ttir.unsqueeze"(%473, %474) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2328)
+    %476 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2329)
+    %477 = "ttir.multiply"(%471, %475, %476) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2329)
+    %478 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2330)
+    %479 = "ttir.matmul"(%477, %arg316, %478) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2330)
+    %480 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2331)
+    %481 = "ttir.add"(%447, %479, %480) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2331)
+    %482 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2332)
+    %483 = "ttir.multiply"(%481, %481, %482) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2332)
+    %484 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2333)
+    %485 = "ttir.mean"(%483, %484) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2333)
+    %486 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2334)
+    %487 = "ttir.add"(%485, %arg34, %486) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2334)
+    %488 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2335)
+    %489 = "ttir.sqrt"(%487, %488) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2335)
+    %490 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2336)
+    %491 = "ttir.reciprocal"(%489, %490) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2336)
+    %492 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2337)
+    %493 = "ttir.multiply"(%481, %491, %492) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2337)
+    %494 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2338)
+    %495 = "ttir.multiply"(%arg317, %493, %494) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2338)
+    %496 = tensor.empty() : tensor<12x3200xf32> loc(#loc2339)
+    %497 = "ttir.squeeze"(%495, %496) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2339)
+    %498 = tensor.empty() : tensor<12x3200xf32> loc(#loc2340)
+    %499 = "ttir.matmul"(%497, %arg318, %498) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2340)
+    %500 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2341)
+    %501 = "ttir.reshape"(%499, %500) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2341)
+    %502 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2342)
+    %503 = "ttir.transpose"(%501, %502) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2342)
+    %504 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2343)
+    %505 = "ttir.concat"(%arg35, %arg35, %504) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2343)
+    %506 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2344)
+    %507 = "ttir.sin"(%505, %506) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2344)
+    %508 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2345)
+    %509 = "ttir.unsqueeze"(%507, %508) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2345)
+    %510 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2346)
+    %511 = "ttir.multiply"(%503, %509, %510) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2346)
+    %512 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2347)
+    %513 = "ttir.transpose"(%503, %512) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2347)
+    %514 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2348)
+    %515 = "ttir.matmul"(%arg36, %513, %514) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2348)
+    %516 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2349)
+    %517 = "ttir.transpose"(%515, %516) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2349)
+    %518 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2350)
+    %519 = "ttir.multiply"(%517, %arg37, %518) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2350)
+    %520 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2351)
+    %521 = "ttir.transpose"(%503, %520) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2351)
+    %522 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2352)
+    %523 = "ttir.matmul"(%arg38, %521, %522) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2352)
+    %524 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2353)
+    %525 = "ttir.transpose"(%523, %524) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2353)
+    %526 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2354)
+    %527 = "ttir.concat"(%519, %525, %526) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2354)
+    %528 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2355)
+    %529 = "ttir.cos"(%505, %528) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2355)
+    %530 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2356)
+    %531 = "ttir.unsqueeze"(%529, %530) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2356)
+    %532 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2357)
+    %533 = "ttir.multiply"(%527, %531, %532) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2357)
+    %534 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2358)
+    %535 = "ttir.add"(%511, %533, %534) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2358)
+    %536 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2359)
+    %537 = "ttir.squeeze"(%535, %536) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2359)
+    %538 = tensor.empty() : tensor<12x3200xf32> loc(#loc2360)
+    %539 = "ttir.matmul"(%497, %arg319, %538) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2360)
+    %540 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2361)
+    %541 = "ttir.reshape"(%539, %540) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2361)
+    %542 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2362)
+    %543 = "ttir.transpose"(%541, %542) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2362)
+    %544 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2363)
+    %545 = "ttir.multiply"(%543, %509, %544) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2363)
+    %546 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2364)
+    %547 = "ttir.transpose"(%543, %546) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2364)
+    %548 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2365)
+    %549 = "ttir.matmul"(%arg39, %547, %548) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2365)
+    %550 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2366)
+    %551 = "ttir.transpose"(%549, %550) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2366)
+    %552 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2367)
+    %553 = "ttir.multiply"(%551, %arg40, %552) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2367)
+    %554 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2368)
+    %555 = "ttir.transpose"(%543, %554) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2368)
+    %556 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2369)
+    %557 = "ttir.matmul"(%arg41, %555, %556) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2369)
+    %558 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2370)
+    %559 = "ttir.transpose"(%557, %558) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2370)
+    %560 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2371)
+    %561 = "ttir.concat"(%553, %559, %560) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2371)
+    %562 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2372)
+    %563 = "ttir.multiply"(%561, %531, %562) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2372)
+    %564 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2373)
+    %565 = "ttir.add"(%545, %563, %564) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2373)
+    %566 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2374)
+    %567 = "ttir.squeeze"(%565, %566) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2374)
+    %568 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2375)
+    %569 = "ttir.transpose"(%567, %568) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2375)
+    %570 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2376)
+    %571 = "ttir.matmul"(%537, %569, %570) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2376)
+    %572 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2377)
+    %573 = "ttir.unsqueeze"(%571, %572) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2377)
+    %574 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2378)
+    %575 = "ttir.multiply"(%573, %arg42, %574) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2378)
+    %576 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2379)
+    %577 = "ttir.add"(%575, %arg43, %576) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2379)
+    %578 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2380)
+    %579 = "ttir.softmax"(%577, %578) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2380)
+    %580 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2381)
+    %581 = "ttir.squeeze"(%579, %580) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2381)
+    %582 = tensor.empty() : tensor<12x3200xf32> loc(#loc2382)
+    %583 = "ttir.matmul"(%497, %arg320, %582) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2382)
+    %584 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2383)
+    %585 = "ttir.reshape"(%583, %584) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2383)
+    %586 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2384)
+    %587 = "ttir.transpose"(%585, %586) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2384)
+    %588 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2385)
+    %589 = "ttir.transpose"(%587, %588) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2385)
+    %590 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2386)
+    %591 = "ttir.squeeze"(%589, %590) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2386)
+    %592 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2387)
+    %593 = "ttir.transpose"(%591, %592) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2387)
+    %594 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2388)
+    %595 = "ttir.matmul"(%581, %593, %594) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2388)
+    %596 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2389)
+    %597 = "ttir.unsqueeze"(%595, %596) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2389)
+    %598 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2390)
+    %599 = "ttir.transpose"(%597, %598) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2390)
+    %600 = tensor.empty() : tensor<12x3200xf32> loc(#loc2391)
+    %601 = "ttir.reshape"(%599, %600) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2391)
+    %602 = tensor.empty() : tensor<12x3200xf32> loc(#loc2392)
+    %603 = "ttir.matmul"(%601, %arg321, %602) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2392)
+    %604 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2393)
+    %605 = "ttir.unsqueeze"(%603, %604) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2393)
+    %606 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2394)
+    %607 = "ttir.add"(%481, %605, %606) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2394)
+    %608 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2395)
+    %609 = "ttir.multiply"(%607, %607, %608) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2395)
+    %610 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2396)
+    %611 = "ttir.mean"(%609, %610) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2396)
+    %612 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2397)
+    %613 = "ttir.add"(%611, %arg44, %612) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2397)
+    %614 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2398)
+    %615 = "ttir.sqrt"(%613, %614) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2398)
+    %616 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2399)
+    %617 = "ttir.reciprocal"(%615, %616) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2399)
+    %618 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2400)
+    %619 = "ttir.multiply"(%607, %617, %618) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2400)
+    %620 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2401)
+    %621 = "ttir.multiply"(%arg322, %619, %620) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2401)
+    %622 = tensor.empty() : tensor<12x3200xf32> loc(#loc2402)
+    %623 = "ttir.squeeze"(%621, %622) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2402)
+    %624 = tensor.empty() : tensor<12x8640xf32> loc(#loc2403)
+    %625 = "ttir.matmul"(%623, %arg323, %624) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2403)
+    %626 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2404)
+    %627 = "ttir.unsqueeze"(%625, %626) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2404)
+    %628 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2405)
+    %629 = "ttir.sigmoid"(%627, %628) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2405)
+    %630 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2406)
+    %631 = "ttir.multiply"(%627, %629, %630) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2406)
+    %632 = tensor.empty() : tensor<12x8640xf32> loc(#loc2407)
+    %633 = "ttir.matmul"(%623, %arg324, %632) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2407)
+    %634 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2408)
+    %635 = "ttir.unsqueeze"(%633, %634) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2408)
+    %636 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2409)
+    %637 = "ttir.multiply"(%631, %635, %636) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2409)
+    %638 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2410)
+    %639 = "ttir.matmul"(%637, %arg325, %638) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2410)
+    %640 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2411)
+    %641 = "ttir.add"(%607, %639, %640) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2411)
+    %642 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2412)
+    %643 = "ttir.multiply"(%641, %641, %642) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2412)
+    %644 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2413)
+    %645 = "ttir.mean"(%643, %644) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2413)
+    %646 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2414)
+    %647 = "ttir.add"(%645, %arg45, %646) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2414)
+    %648 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2415)
+    %649 = "ttir.sqrt"(%647, %648) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2415)
+    %650 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2416)
+    %651 = "ttir.reciprocal"(%649, %650) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2416)
+    %652 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2417)
+    %653 = "ttir.multiply"(%641, %651, %652) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2417)
+    %654 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2418)
+    %655 = "ttir.multiply"(%arg326, %653, %654) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2418)
+    %656 = tensor.empty() : tensor<12x3200xf32> loc(#loc2419)
+    %657 = "ttir.squeeze"(%655, %656) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2419)
+    %658 = tensor.empty() : tensor<12x3200xf32> loc(#loc2420)
+    %659 = "ttir.matmul"(%657, %arg327, %658) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2420)
+    %660 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2421)
+    %661 = "ttir.reshape"(%659, %660) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2421)
+    %662 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2422)
+    %663 = "ttir.transpose"(%661, %662) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2422)
+    %664 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2423)
+    %665 = "ttir.concat"(%arg46, %arg46, %664) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2423)
+    %666 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2424)
+    %667 = "ttir.sin"(%665, %666) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2424)
+    %668 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2425)
+    %669 = "ttir.unsqueeze"(%667, %668) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2425)
+    %670 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2426)
+    %671 = "ttir.multiply"(%663, %669, %670) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2426)
+    %672 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2427)
+    %673 = "ttir.transpose"(%663, %672) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2427)
+    %674 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2428)
+    %675 = "ttir.matmul"(%arg47, %673, %674) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2428)
+    %676 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2429)
+    %677 = "ttir.transpose"(%675, %676) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2429)
+    %678 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2430)
+    %679 = "ttir.multiply"(%677, %arg48, %678) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2430)
+    %680 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2431)
+    %681 = "ttir.transpose"(%663, %680) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2431)
+    %682 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2432)
+    %683 = "ttir.matmul"(%arg49, %681, %682) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2432)
+    %684 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2433)
+    %685 = "ttir.transpose"(%683, %684) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2433)
+    %686 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2434)
+    %687 = "ttir.concat"(%679, %685, %686) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2434)
+    %688 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2435)
+    %689 = "ttir.cos"(%665, %688) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2435)
+    %690 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2436)
+    %691 = "ttir.unsqueeze"(%689, %690) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2436)
+    %692 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2437)
+    %693 = "ttir.multiply"(%687, %691, %692) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2437)
+    %694 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2438)
+    %695 = "ttir.add"(%671, %693, %694) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2438)
+    %696 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2439)
+    %697 = "ttir.squeeze"(%695, %696) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2439)
+    %698 = tensor.empty() : tensor<12x3200xf32> loc(#loc2440)
+    %699 = "ttir.matmul"(%657, %arg328, %698) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2440)
+    %700 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2441)
+    %701 = "ttir.reshape"(%699, %700) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2441)
+    %702 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2442)
+    %703 = "ttir.transpose"(%701, %702) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2442)
+    %704 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2443)
+    %705 = "ttir.multiply"(%703, %669, %704) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2443)
+    %706 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2444)
+    %707 = "ttir.transpose"(%703, %706) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2444)
+    %708 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2445)
+    %709 = "ttir.matmul"(%arg50, %707, %708) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2445)
+    %710 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2446)
+    %711 = "ttir.transpose"(%709, %710) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2446)
+    %712 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2447)
+    %713 = "ttir.multiply"(%711, %arg51, %712) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2447)
+    %714 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2448)
+    %715 = "ttir.transpose"(%703, %714) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2448)
+    %716 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2449)
+    %717 = "ttir.matmul"(%arg52, %715, %716) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2449)
+    %718 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2450)
+    %719 = "ttir.transpose"(%717, %718) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2450)
+    %720 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2451)
+    %721 = "ttir.concat"(%713, %719, %720) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2451)
+    %722 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2452)
+    %723 = "ttir.multiply"(%721, %691, %722) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2452)
+    %724 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2453)
+    %725 = "ttir.add"(%705, %723, %724) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2453)
+    %726 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2454)
+    %727 = "ttir.squeeze"(%725, %726) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2454)
+    %728 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2455)
+    %729 = "ttir.transpose"(%727, %728) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2455)
+    %730 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2456)
+    %731 = "ttir.matmul"(%697, %729, %730) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2456)
+    %732 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2457)
+    %733 = "ttir.unsqueeze"(%731, %732) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2457)
+    %734 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2458)
+    %735 = "ttir.multiply"(%733, %arg53, %734) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2458)
+    %736 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2459)
+    %737 = "ttir.add"(%735, %arg54, %736) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2459)
+    %738 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2460)
+    %739 = "ttir.softmax"(%737, %738) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2460)
+    %740 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2461)
+    %741 = "ttir.squeeze"(%739, %740) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2461)
+    %742 = tensor.empty() : tensor<12x3200xf32> loc(#loc2462)
+    %743 = "ttir.matmul"(%657, %arg329, %742) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2462)
+    %744 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2463)
+    %745 = "ttir.reshape"(%743, %744) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2463)
+    %746 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2464)
+    %747 = "ttir.transpose"(%745, %746) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2464)
+    %748 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2465)
+    %749 = "ttir.transpose"(%747, %748) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2465)
+    %750 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2466)
+    %751 = "ttir.squeeze"(%749, %750) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2466)
+    %752 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2467)
+    %753 = "ttir.transpose"(%751, %752) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2467)
+    %754 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2468)
+    %755 = "ttir.matmul"(%741, %753, %754) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2468)
+    %756 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2469)
+    %757 = "ttir.unsqueeze"(%755, %756) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2469)
+    %758 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2470)
+    %759 = "ttir.transpose"(%757, %758) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2470)
+    %760 = tensor.empty() : tensor<12x3200xf32> loc(#loc2471)
+    %761 = "ttir.reshape"(%759, %760) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2471)
+    %762 = tensor.empty() : tensor<12x3200xf32> loc(#loc2472)
+    %763 = "ttir.matmul"(%761, %arg330, %762) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2472)
+    %764 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2473)
+    %765 = "ttir.unsqueeze"(%763, %764) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2473)
+    %766 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2474)
+    %767 = "ttir.add"(%641, %765, %766) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2474)
+    %768 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2475)
+    %769 = "ttir.multiply"(%767, %767, %768) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2475)
+    %770 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2476)
+    %771 = "ttir.mean"(%769, %770) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2476)
+    %772 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2477)
+    %773 = "ttir.add"(%771, %arg55, %772) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2477)
+    %774 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2478)
+    %775 = "ttir.sqrt"(%773, %774) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2478)
+    %776 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2479)
+    %777 = "ttir.reciprocal"(%775, %776) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2479)
+    %778 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2480)
+    %779 = "ttir.multiply"(%767, %777, %778) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2480)
+    %780 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2481)
+    %781 = "ttir.multiply"(%arg331, %779, %780) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2481)
+    %782 = tensor.empty() : tensor<12x3200xf32> loc(#loc2482)
+    %783 = "ttir.squeeze"(%781, %782) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2482)
+    %784 = tensor.empty() : tensor<12x8640xf32> loc(#loc2483)
+    %785 = "ttir.matmul"(%783, %arg332, %784) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2483)
+    %786 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2484)
+    %787 = "ttir.unsqueeze"(%785, %786) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2484)
+    %788 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2485)
+    %789 = "ttir.sigmoid"(%787, %788) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2485)
+    %790 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2486)
+    %791 = "ttir.multiply"(%787, %789, %790) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2486)
+    %792 = tensor.empty() : tensor<12x8640xf32> loc(#loc2487)
+    %793 = "ttir.matmul"(%783, %arg333, %792) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2487)
+    %794 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2488)
+    %795 = "ttir.unsqueeze"(%793, %794) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2488)
+    %796 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2489)
+    %797 = "ttir.multiply"(%791, %795, %796) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2489)
+    %798 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2490)
+    %799 = "ttir.matmul"(%797, %arg334, %798) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2490)
+    %800 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2491)
+    %801 = "ttir.add"(%767, %799, %800) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2491)
+    %802 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2492)
+    %803 = "ttir.multiply"(%801, %801, %802) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2492)
+    %804 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2493)
+    %805 = "ttir.mean"(%803, %804) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2493)
+    %806 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2494)
+    %807 = "ttir.add"(%805, %arg56, %806) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2494)
+    %808 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2495)
+    %809 = "ttir.sqrt"(%807, %808) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2495)
+    %810 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2496)
+    %811 = "ttir.reciprocal"(%809, %810) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2496)
+    %812 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2497)
+    %813 = "ttir.multiply"(%801, %811, %812) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2497)
+    %814 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2498)
+    %815 = "ttir.multiply"(%arg335, %813, %814) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2498)
+    %816 = tensor.empty() : tensor<12x3200xf32> loc(#loc2499)
+    %817 = "ttir.squeeze"(%815, %816) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2499)
+    %818 = tensor.empty() : tensor<12x3200xf32> loc(#loc2500)
+    %819 = "ttir.matmul"(%817, %arg336, %818) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2500)
+    %820 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2501)
+    %821 = "ttir.reshape"(%819, %820) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2501)
+    %822 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2502)
+    %823 = "ttir.transpose"(%821, %822) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2502)
+    %824 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2503)
+    %825 = "ttir.concat"(%arg57, %arg57, %824) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2503)
+    %826 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2504)
+    %827 = "ttir.sin"(%825, %826) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2504)
+    %828 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2505)
+    %829 = "ttir.unsqueeze"(%827, %828) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2505)
+    %830 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2506)
+    %831 = "ttir.multiply"(%823, %829, %830) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2506)
+    %832 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2507)
+    %833 = "ttir.transpose"(%823, %832) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2507)
+    %834 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2508)
+    %835 = "ttir.matmul"(%arg58, %833, %834) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2508)
+    %836 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2509)
+    %837 = "ttir.transpose"(%835, %836) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2509)
+    %838 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2510)
+    %839 = "ttir.multiply"(%837, %arg59, %838) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2510)
+    %840 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2511)
+    %841 = "ttir.transpose"(%823, %840) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2511)
+    %842 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2512)
+    %843 = "ttir.matmul"(%arg60, %841, %842) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2512)
+    %844 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2513)
+    %845 = "ttir.transpose"(%843, %844) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2513)
+    %846 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2514)
+    %847 = "ttir.concat"(%839, %845, %846) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2514)
+    %848 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2515)
+    %849 = "ttir.cos"(%825, %848) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2515)
+    %850 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2516)
+    %851 = "ttir.unsqueeze"(%849, %850) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2516)
+    %852 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2517)
+    %853 = "ttir.multiply"(%847, %851, %852) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2517)
+    %854 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2518)
+    %855 = "ttir.add"(%831, %853, %854) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2518)
+    %856 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2519)
+    %857 = "ttir.squeeze"(%855, %856) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2519)
+    %858 = tensor.empty() : tensor<12x3200xf32> loc(#loc2520)
+    %859 = "ttir.matmul"(%817, %arg337, %858) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2520)
+    %860 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2521)
+    %861 = "ttir.reshape"(%859, %860) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2521)
+    %862 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2522)
+    %863 = "ttir.transpose"(%861, %862) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2522)
+    %864 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2523)
+    %865 = "ttir.multiply"(%863, %829, %864) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2523)
+    %866 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2524)
+    %867 = "ttir.transpose"(%863, %866) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2524)
+    %868 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2525)
+    %869 = "ttir.matmul"(%arg61, %867, %868) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2525)
+    %870 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2526)
+    %871 = "ttir.transpose"(%869, %870) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2526)
+    %872 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2527)
+    %873 = "ttir.multiply"(%871, %arg62, %872) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2527)
+    %874 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2528)
+    %875 = "ttir.transpose"(%863, %874) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2528)
+    %876 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2529)
+    %877 = "ttir.matmul"(%arg63, %875, %876) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2529)
+    %878 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2530)
+    %879 = "ttir.transpose"(%877, %878) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2530)
+    %880 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2531)
+    %881 = "ttir.concat"(%873, %879, %880) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2531)
+    %882 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2532)
+    %883 = "ttir.multiply"(%881, %851, %882) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2532)
+    %884 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2533)
+    %885 = "ttir.add"(%865, %883, %884) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2533)
+    %886 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2534)
+    %887 = "ttir.squeeze"(%885, %886) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2534)
+    %888 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2535)
+    %889 = "ttir.transpose"(%887, %888) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2535)
+    %890 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2536)
+    %891 = "ttir.matmul"(%857, %889, %890) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2536)
+    %892 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2537)
+    %893 = "ttir.unsqueeze"(%891, %892) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2537)
+    %894 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2538)
+    %895 = "ttir.multiply"(%893, %arg64, %894) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2538)
+    %896 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2539)
+    %897 = "ttir.add"(%895, %arg65, %896) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2539)
+    %898 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2540)
+    %899 = "ttir.softmax"(%897, %898) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2540)
+    %900 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2541)
+    %901 = "ttir.squeeze"(%899, %900) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2541)
+    %902 = tensor.empty() : tensor<12x3200xf32> loc(#loc2542)
+    %903 = "ttir.matmul"(%817, %arg338, %902) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2542)
+    %904 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2543)
+    %905 = "ttir.reshape"(%903, %904) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2543)
+    %906 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2544)
+    %907 = "ttir.transpose"(%905, %906) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2544)
+    %908 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2545)
+    %909 = "ttir.transpose"(%907, %908) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2545)
+    %910 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2546)
+    %911 = "ttir.squeeze"(%909, %910) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2546)
+    %912 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2547)
+    %913 = "ttir.transpose"(%911, %912) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2547)
+    %914 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2548)
+    %915 = "ttir.matmul"(%901, %913, %914) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2548)
+    %916 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2549)
+    %917 = "ttir.unsqueeze"(%915, %916) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2549)
+    %918 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2550)
+    %919 = "ttir.transpose"(%917, %918) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2550)
+    %920 = tensor.empty() : tensor<12x3200xf32> loc(#loc2551)
+    %921 = "ttir.reshape"(%919, %920) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2551)
+    %922 = tensor.empty() : tensor<12x3200xf32> loc(#loc2552)
+    %923 = "ttir.matmul"(%921, %arg339, %922) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2552)
+    %924 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2553)
+    %925 = "ttir.unsqueeze"(%923, %924) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2553)
+    %926 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2554)
+    %927 = "ttir.add"(%801, %925, %926) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2554)
+    %928 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2555)
+    %929 = "ttir.multiply"(%927, %927, %928) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2555)
+    %930 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2556)
+    %931 = "ttir.mean"(%929, %930) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2556)
+    %932 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2557)
+    %933 = "ttir.add"(%931, %arg66, %932) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2557)
+    %934 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2558)
+    %935 = "ttir.sqrt"(%933, %934) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2558)
+    %936 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2559)
+    %937 = "ttir.reciprocal"(%935, %936) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2559)
+    %938 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2560)
+    %939 = "ttir.multiply"(%927, %937, %938) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2560)
+    %940 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2561)
+    %941 = "ttir.multiply"(%arg340, %939, %940) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2561)
+    %942 = tensor.empty() : tensor<12x3200xf32> loc(#loc2562)
+    %943 = "ttir.squeeze"(%941, %942) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2562)
+    %944 = tensor.empty() : tensor<12x8640xf32> loc(#loc2563)
+    %945 = "ttir.matmul"(%943, %arg341, %944) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2563)
+    %946 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2564)
+    %947 = "ttir.unsqueeze"(%945, %946) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2564)
+    %948 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2565)
+    %949 = "ttir.sigmoid"(%947, %948) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2565)
+    %950 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2566)
+    %951 = "ttir.multiply"(%947, %949, %950) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2566)
+    %952 = tensor.empty() : tensor<12x8640xf32> loc(#loc2567)
+    %953 = "ttir.matmul"(%943, %arg342, %952) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2567)
+    %954 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2568)
+    %955 = "ttir.unsqueeze"(%953, %954) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2568)
+    %956 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2569)
+    %957 = "ttir.multiply"(%951, %955, %956) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2569)
+    %958 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2570)
+    %959 = "ttir.matmul"(%957, %arg343, %958) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2570)
+    %960 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2571)
+    %961 = "ttir.add"(%927, %959, %960) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2571)
+    %962 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2572)
+    %963 = "ttir.multiply"(%961, %961, %962) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2572)
+    %964 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2573)
+    %965 = "ttir.mean"(%963, %964) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2573)
+    %966 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2574)
+    %967 = "ttir.add"(%965, %arg67, %966) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2574)
+    %968 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2575)
+    %969 = "ttir.sqrt"(%967, %968) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2575)
+    %970 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2576)
+    %971 = "ttir.reciprocal"(%969, %970) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2576)
+    %972 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2577)
+    %973 = "ttir.multiply"(%961, %971, %972) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2577)
+    %974 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2578)
+    %975 = "ttir.multiply"(%arg344, %973, %974) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2578)
+    %976 = tensor.empty() : tensor<12x3200xf32> loc(#loc2579)
+    %977 = "ttir.squeeze"(%975, %976) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2579)
+    %978 = tensor.empty() : tensor<12x3200xf32> loc(#loc2580)
+    %979 = "ttir.matmul"(%977, %arg345, %978) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2580)
+    %980 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2581)
+    %981 = "ttir.reshape"(%979, %980) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2581)
+    %982 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2582)
+    %983 = "ttir.transpose"(%981, %982) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2582)
+    %984 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2583)
+    %985 = "ttir.concat"(%arg68, %arg68, %984) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2583)
+    %986 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2584)
+    %987 = "ttir.sin"(%985, %986) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2584)
+    %988 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2585)
+    %989 = "ttir.unsqueeze"(%987, %988) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2585)
+    %990 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2586)
+    %991 = "ttir.multiply"(%983, %989, %990) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2586)
+    %992 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2587)
+    %993 = "ttir.transpose"(%983, %992) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2587)
+    %994 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2588)
+    %995 = "ttir.matmul"(%arg69, %993, %994) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2588)
+    %996 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2589)
+    %997 = "ttir.transpose"(%995, %996) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2589)
+    %998 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2590)
+    %999 = "ttir.multiply"(%997, %arg70, %998) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2590)
+    %1000 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2591)
+    %1001 = "ttir.transpose"(%983, %1000) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2591)
+    %1002 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2592)
+    %1003 = "ttir.matmul"(%arg71, %1001, %1002) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2592)
+    %1004 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2593)
+    %1005 = "ttir.transpose"(%1003, %1004) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2593)
+    %1006 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2594)
+    %1007 = "ttir.concat"(%999, %1005, %1006) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2594)
+    %1008 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2595)
+    %1009 = "ttir.cos"(%985, %1008) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2595)
+    %1010 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2596)
+    %1011 = "ttir.unsqueeze"(%1009, %1010) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2596)
+    %1012 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2597)
+    %1013 = "ttir.multiply"(%1007, %1011, %1012) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2597)
+    %1014 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2598)
+    %1015 = "ttir.add"(%991, %1013, %1014) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2598)
+    %1016 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2599)
+    %1017 = "ttir.squeeze"(%1015, %1016) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2599)
+    %1018 = tensor.empty() : tensor<12x3200xf32> loc(#loc2600)
+    %1019 = "ttir.matmul"(%977, %arg346, %1018) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2600)
+    %1020 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2601)
+    %1021 = "ttir.reshape"(%1019, %1020) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2601)
+    %1022 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2602)
+    %1023 = "ttir.transpose"(%1021, %1022) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2602)
+    %1024 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2603)
+    %1025 = "ttir.multiply"(%1023, %989, %1024) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2603)
+    %1026 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2604)
+    %1027 = "ttir.transpose"(%1023, %1026) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2604)
+    %1028 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2605)
+    %1029 = "ttir.matmul"(%arg72, %1027, %1028) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2605)
+    %1030 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2606)
+    %1031 = "ttir.transpose"(%1029, %1030) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2606)
+    %1032 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2607)
+    %1033 = "ttir.multiply"(%1031, %arg73, %1032) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2607)
+    %1034 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2608)
+    %1035 = "ttir.transpose"(%1023, %1034) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2608)
+    %1036 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2609)
+    %1037 = "ttir.matmul"(%arg74, %1035, %1036) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2609)
+    %1038 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2610)
+    %1039 = "ttir.transpose"(%1037, %1038) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2610)
+    %1040 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2611)
+    %1041 = "ttir.concat"(%1033, %1039, %1040) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2611)
+    %1042 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2612)
+    %1043 = "ttir.multiply"(%1041, %1011, %1042) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2612)
+    %1044 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2613)
+    %1045 = "ttir.add"(%1025, %1043, %1044) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2613)
+    %1046 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2614)
+    %1047 = "ttir.squeeze"(%1045, %1046) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2614)
+    %1048 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2615)
+    %1049 = "ttir.transpose"(%1047, %1048) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2615)
+    %1050 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2616)
+    %1051 = "ttir.matmul"(%1017, %1049, %1050) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2616)
+    %1052 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2617)
+    %1053 = "ttir.unsqueeze"(%1051, %1052) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2617)
+    %1054 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2618)
+    %1055 = "ttir.multiply"(%1053, %arg75, %1054) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2618)
+    %1056 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2619)
+    %1057 = "ttir.add"(%1055, %arg76, %1056) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2619)
+    %1058 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2620)
+    %1059 = "ttir.softmax"(%1057, %1058) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2620)
+    %1060 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2621)
+    %1061 = "ttir.squeeze"(%1059, %1060) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2621)
+    %1062 = tensor.empty() : tensor<12x3200xf32> loc(#loc2622)
+    %1063 = "ttir.matmul"(%977, %arg347, %1062) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2622)
+    %1064 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2623)
+    %1065 = "ttir.reshape"(%1063, %1064) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2623)
+    %1066 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2624)
+    %1067 = "ttir.transpose"(%1065, %1066) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2624)
+    %1068 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2625)
+    %1069 = "ttir.transpose"(%1067, %1068) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2625)
+    %1070 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2626)
+    %1071 = "ttir.squeeze"(%1069, %1070) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2626)
+    %1072 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2627)
+    %1073 = "ttir.transpose"(%1071, %1072) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2627)
+    %1074 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2628)
+    %1075 = "ttir.matmul"(%1061, %1073, %1074) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2628)
+    %1076 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2629)
+    %1077 = "ttir.unsqueeze"(%1075, %1076) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2629)
+    %1078 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2630)
+    %1079 = "ttir.transpose"(%1077, %1078) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2630)
+    %1080 = tensor.empty() : tensor<12x3200xf32> loc(#loc2631)
+    %1081 = "ttir.reshape"(%1079, %1080) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2631)
+    %1082 = tensor.empty() : tensor<12x3200xf32> loc(#loc2632)
+    %1083 = "ttir.matmul"(%1081, %arg348, %1082) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2632)
+    %1084 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2633)
+    %1085 = "ttir.unsqueeze"(%1083, %1084) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2633)
+    %1086 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2634)
+    %1087 = "ttir.add"(%961, %1085, %1086) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2634)
+    %1088 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2635)
+    %1089 = "ttir.multiply"(%1087, %1087, %1088) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2635)
+    %1090 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2636)
+    %1091 = "ttir.mean"(%1089, %1090) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2636)
+    %1092 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2637)
+    %1093 = "ttir.add"(%1091, %arg77, %1092) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2637)
+    %1094 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2638)
+    %1095 = "ttir.sqrt"(%1093, %1094) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2638)
+    %1096 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2639)
+    %1097 = "ttir.reciprocal"(%1095, %1096) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2639)
+    %1098 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2640)
+    %1099 = "ttir.multiply"(%1087, %1097, %1098) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2640)
+    %1100 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2641)
+    %1101 = "ttir.multiply"(%arg349, %1099, %1100) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2641)
+    %1102 = tensor.empty() : tensor<12x3200xf32> loc(#loc2642)
+    %1103 = "ttir.squeeze"(%1101, %1102) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2642)
+    %1104 = tensor.empty() : tensor<12x8640xf32> loc(#loc2643)
+    %1105 = "ttir.matmul"(%1103, %arg350, %1104) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2643)
+    %1106 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2644)
+    %1107 = "ttir.unsqueeze"(%1105, %1106) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2644)
+    %1108 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2645)
+    %1109 = "ttir.sigmoid"(%1107, %1108) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2645)
+    %1110 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2646)
+    %1111 = "ttir.multiply"(%1107, %1109, %1110) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2646)
+    %1112 = tensor.empty() : tensor<12x8640xf32> loc(#loc2647)
+    %1113 = "ttir.matmul"(%1103, %arg351, %1112) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2647)
+    %1114 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2648)
+    %1115 = "ttir.unsqueeze"(%1113, %1114) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2648)
+    %1116 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2649)
+    %1117 = "ttir.multiply"(%1111, %1115, %1116) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2649)
+    %1118 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2650)
+    %1119 = "ttir.matmul"(%1117, %arg352, %1118) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2650)
+    %1120 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2651)
+    %1121 = "ttir.add"(%1087, %1119, %1120) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2651)
+    %1122 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2652)
+    %1123 = "ttir.multiply"(%1121, %1121, %1122) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2652)
+    %1124 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2653)
+    %1125 = "ttir.mean"(%1123, %1124) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2653)
+    %1126 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2654)
+    %1127 = "ttir.add"(%1125, %arg78, %1126) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2654)
+    %1128 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2655)
+    %1129 = "ttir.sqrt"(%1127, %1128) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2655)
+    %1130 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2656)
+    %1131 = "ttir.reciprocal"(%1129, %1130) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2656)
+    %1132 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2657)
+    %1133 = "ttir.multiply"(%1121, %1131, %1132) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2657)
+    %1134 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2658)
+    %1135 = "ttir.multiply"(%arg353, %1133, %1134) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2658)
+    %1136 = tensor.empty() : tensor<12x3200xf32> loc(#loc2659)
+    %1137 = "ttir.squeeze"(%1135, %1136) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2659)
+    %1138 = tensor.empty() : tensor<12x3200xf32> loc(#loc2660)
+    %1139 = "ttir.matmul"(%1137, %arg354, %1138) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2660)
+    %1140 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2661)
+    %1141 = "ttir.reshape"(%1139, %1140) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2661)
+    %1142 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2662)
+    %1143 = "ttir.transpose"(%1141, %1142) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2662)
+    %1144 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2663)
+    %1145 = "ttir.concat"(%arg79, %arg79, %1144) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2663)
+    %1146 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2664)
+    %1147 = "ttir.sin"(%1145, %1146) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2664)
+    %1148 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2665)
+    %1149 = "ttir.unsqueeze"(%1147, %1148) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2665)
+    %1150 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2666)
+    %1151 = "ttir.multiply"(%1143, %1149, %1150) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2666)
+    %1152 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2667)
+    %1153 = "ttir.transpose"(%1143, %1152) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2667)
+    %1154 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2668)
+    %1155 = "ttir.matmul"(%arg80, %1153, %1154) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2668)
+    %1156 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2669)
+    %1157 = "ttir.transpose"(%1155, %1156) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2669)
+    %1158 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2670)
+    %1159 = "ttir.multiply"(%1157, %arg81, %1158) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2670)
+    %1160 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2671)
+    %1161 = "ttir.transpose"(%1143, %1160) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2671)
+    %1162 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2672)
+    %1163 = "ttir.matmul"(%arg82, %1161, %1162) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2672)
+    %1164 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2673)
+    %1165 = "ttir.transpose"(%1163, %1164) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2673)
+    %1166 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2674)
+    %1167 = "ttir.concat"(%1159, %1165, %1166) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2674)
+    %1168 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2675)
+    %1169 = "ttir.cos"(%1145, %1168) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2675)
+    %1170 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2676)
+    %1171 = "ttir.unsqueeze"(%1169, %1170) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2676)
+    %1172 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2677)
+    %1173 = "ttir.multiply"(%1167, %1171, %1172) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2677)
+    %1174 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2678)
+    %1175 = "ttir.add"(%1151, %1173, %1174) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2678)
+    %1176 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2679)
+    %1177 = "ttir.squeeze"(%1175, %1176) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2679)
+    %1178 = tensor.empty() : tensor<12x3200xf32> loc(#loc2680)
+    %1179 = "ttir.matmul"(%1137, %arg355, %1178) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2680)
+    %1180 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2681)
+    %1181 = "ttir.reshape"(%1179, %1180) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2681)
+    %1182 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2682)
+    %1183 = "ttir.transpose"(%1181, %1182) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2682)
+    %1184 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2683)
+    %1185 = "ttir.multiply"(%1183, %1149, %1184) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2683)
+    %1186 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2684)
+    %1187 = "ttir.transpose"(%1183, %1186) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2684)
+    %1188 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2685)
+    %1189 = "ttir.matmul"(%arg83, %1187, %1188) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2685)
+    %1190 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2686)
+    %1191 = "ttir.transpose"(%1189, %1190) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2686)
+    %1192 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2687)
+    %1193 = "ttir.multiply"(%1191, %arg84, %1192) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2687)
+    %1194 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2688)
+    %1195 = "ttir.transpose"(%1183, %1194) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2688)
+    %1196 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2689)
+    %1197 = "ttir.matmul"(%arg85, %1195, %1196) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2689)
+    %1198 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2690)
+    %1199 = "ttir.transpose"(%1197, %1198) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2690)
+    %1200 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2691)
+    %1201 = "ttir.concat"(%1193, %1199, %1200) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2691)
+    %1202 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2692)
+    %1203 = "ttir.multiply"(%1201, %1171, %1202) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2692)
+    %1204 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2693)
+    %1205 = "ttir.add"(%1185, %1203, %1204) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2693)
+    %1206 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2694)
+    %1207 = "ttir.squeeze"(%1205, %1206) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2694)
+    %1208 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2695)
+    %1209 = "ttir.transpose"(%1207, %1208) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2695)
+    %1210 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2696)
+    %1211 = "ttir.matmul"(%1177, %1209, %1210) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2696)
+    %1212 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2697)
+    %1213 = "ttir.unsqueeze"(%1211, %1212) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2697)
+    %1214 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2698)
+    %1215 = "ttir.multiply"(%1213, %arg86, %1214) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2698)
+    %1216 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2699)
+    %1217 = "ttir.add"(%1215, %arg87, %1216) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2699)
+    %1218 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2700)
+    %1219 = "ttir.softmax"(%1217, %1218) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2700)
+    %1220 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2701)
+    %1221 = "ttir.squeeze"(%1219, %1220) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2701)
+    %1222 = tensor.empty() : tensor<12x3200xf32> loc(#loc2702)
+    %1223 = "ttir.matmul"(%1137, %arg356, %1222) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2702)
+    %1224 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2703)
+    %1225 = "ttir.reshape"(%1223, %1224) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2703)
+    %1226 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2704)
+    %1227 = "ttir.transpose"(%1225, %1226) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2704)
+    %1228 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2705)
+    %1229 = "ttir.transpose"(%1227, %1228) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2705)
+    %1230 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2706)
+    %1231 = "ttir.squeeze"(%1229, %1230) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2706)
+    %1232 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2707)
+    %1233 = "ttir.transpose"(%1231, %1232) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2707)
+    %1234 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2708)
+    %1235 = "ttir.matmul"(%1221, %1233, %1234) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2708)
+    %1236 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2709)
+    %1237 = "ttir.unsqueeze"(%1235, %1236) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2709)
+    %1238 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2710)
+    %1239 = "ttir.transpose"(%1237, %1238) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2710)
+    %1240 = tensor.empty() : tensor<12x3200xf32> loc(#loc2711)
+    %1241 = "ttir.reshape"(%1239, %1240) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2711)
+    %1242 = tensor.empty() : tensor<12x3200xf32> loc(#loc2712)
+    %1243 = "ttir.matmul"(%1241, %arg357, %1242) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2712)
+    %1244 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2713)
+    %1245 = "ttir.unsqueeze"(%1243, %1244) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2713)
+    %1246 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2714)
+    %1247 = "ttir.add"(%1121, %1245, %1246) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2714)
+    %1248 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2715)
+    %1249 = "ttir.multiply"(%1247, %1247, %1248) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2715)
+    %1250 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2716)
+    %1251 = "ttir.mean"(%1249, %1250) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2716)
+    %1252 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2717)
+    %1253 = "ttir.add"(%1251, %arg88, %1252) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2717)
+    %1254 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2718)
+    %1255 = "ttir.sqrt"(%1253, %1254) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2718)
+    %1256 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2719)
+    %1257 = "ttir.reciprocal"(%1255, %1256) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2719)
+    %1258 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2720)
+    %1259 = "ttir.multiply"(%1247, %1257, %1258) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2720)
+    %1260 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2721)
+    %1261 = "ttir.multiply"(%arg358, %1259, %1260) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2721)
+    %1262 = tensor.empty() : tensor<12x3200xf32> loc(#loc2722)
+    %1263 = "ttir.squeeze"(%1261, %1262) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2722)
+    %1264 = tensor.empty() : tensor<12x8640xf32> loc(#loc2723)
+    %1265 = "ttir.matmul"(%1263, %arg359, %1264) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2723)
+    %1266 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2724)
+    %1267 = "ttir.unsqueeze"(%1265, %1266) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2724)
+    %1268 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2725)
+    %1269 = "ttir.sigmoid"(%1267, %1268) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2725)
+    %1270 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2726)
+    %1271 = "ttir.multiply"(%1267, %1269, %1270) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2726)
+    %1272 = tensor.empty() : tensor<12x8640xf32> loc(#loc2727)
+    %1273 = "ttir.matmul"(%1263, %arg360, %1272) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2727)
+    %1274 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2728)
+    %1275 = "ttir.unsqueeze"(%1273, %1274) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2728)
+    %1276 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2729)
+    %1277 = "ttir.multiply"(%1271, %1275, %1276) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2729)
+    %1278 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2730)
+    %1279 = "ttir.matmul"(%1277, %arg361, %1278) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2730)
+    %1280 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2731)
+    %1281 = "ttir.add"(%1247, %1279, %1280) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2731)
+    %1282 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2732)
+    %1283 = "ttir.multiply"(%1281, %1281, %1282) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2732)
+    %1284 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2733)
+    %1285 = "ttir.mean"(%1283, %1284) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2733)
+    %1286 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2734)
+    %1287 = "ttir.add"(%1285, %arg89, %1286) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2734)
+    %1288 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2735)
+    %1289 = "ttir.sqrt"(%1287, %1288) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2735)
+    %1290 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2736)
+    %1291 = "ttir.reciprocal"(%1289, %1290) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2736)
+    %1292 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2737)
+    %1293 = "ttir.multiply"(%1281, %1291, %1292) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2737)
+    %1294 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2738)
+    %1295 = "ttir.multiply"(%arg362, %1293, %1294) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2738)
+    %1296 = tensor.empty() : tensor<12x3200xf32> loc(#loc2739)
+    %1297 = "ttir.squeeze"(%1295, %1296) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2739)
+    %1298 = tensor.empty() : tensor<12x3200xf32> loc(#loc2740)
+    %1299 = "ttir.matmul"(%1297, %arg363, %1298) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2740)
+    %1300 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2741)
+    %1301 = "ttir.reshape"(%1299, %1300) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2741)
+    %1302 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2742)
+    %1303 = "ttir.transpose"(%1301, %1302) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2742)
+    %1304 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2743)
+    %1305 = "ttir.concat"(%arg90, %arg90, %1304) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2743)
+    %1306 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2744)
+    %1307 = "ttir.sin"(%1305, %1306) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2744)
+    %1308 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2745)
+    %1309 = "ttir.unsqueeze"(%1307, %1308) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2745)
+    %1310 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2746)
+    %1311 = "ttir.multiply"(%1303, %1309, %1310) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2746)
+    %1312 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2747)
+    %1313 = "ttir.transpose"(%1303, %1312) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2747)
+    %1314 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2748)
+    %1315 = "ttir.matmul"(%arg91, %1313, %1314) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2748)
+    %1316 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2749)
+    %1317 = "ttir.transpose"(%1315, %1316) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2749)
+    %1318 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2750)
+    %1319 = "ttir.multiply"(%1317, %arg92, %1318) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2750)
+    %1320 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2751)
+    %1321 = "ttir.transpose"(%1303, %1320) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2751)
+    %1322 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2752)
+    %1323 = "ttir.matmul"(%arg93, %1321, %1322) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2752)
+    %1324 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2753)
+    %1325 = "ttir.transpose"(%1323, %1324) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2753)
+    %1326 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2754)
+    %1327 = "ttir.concat"(%1319, %1325, %1326) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2754)
+    %1328 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2755)
+    %1329 = "ttir.cos"(%1305, %1328) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2755)
+    %1330 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2756)
+    %1331 = "ttir.unsqueeze"(%1329, %1330) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2756)
+    %1332 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2757)
+    %1333 = "ttir.multiply"(%1327, %1331, %1332) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2757)
+    %1334 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2758)
+    %1335 = "ttir.add"(%1311, %1333, %1334) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2758)
+    %1336 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2759)
+    %1337 = "ttir.squeeze"(%1335, %1336) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2759)
+    %1338 = tensor.empty() : tensor<12x3200xf32> loc(#loc2760)
+    %1339 = "ttir.matmul"(%1297, %arg364, %1338) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2760)
+    %1340 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2761)
+    %1341 = "ttir.reshape"(%1339, %1340) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2761)
+    %1342 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2762)
+    %1343 = "ttir.transpose"(%1341, %1342) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2762)
+    %1344 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2763)
+    %1345 = "ttir.multiply"(%1343, %1309, %1344) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2763)
+    %1346 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2764)
+    %1347 = "ttir.transpose"(%1343, %1346) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2764)
+    %1348 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2765)
+    %1349 = "ttir.matmul"(%arg94, %1347, %1348) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2765)
+    %1350 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2766)
+    %1351 = "ttir.transpose"(%1349, %1350) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2766)
+    %1352 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2767)
+    %1353 = "ttir.multiply"(%1351, %arg95, %1352) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2767)
+    %1354 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2768)
+    %1355 = "ttir.transpose"(%1343, %1354) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2768)
+    %1356 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2769)
+    %1357 = "ttir.matmul"(%arg96, %1355, %1356) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2769)
+    %1358 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2770)
+    %1359 = "ttir.transpose"(%1357, %1358) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2770)
+    %1360 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2771)
+    %1361 = "ttir.concat"(%1353, %1359, %1360) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2771)
+    %1362 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2772)
+    %1363 = "ttir.multiply"(%1361, %1331, %1362) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2772)
+    %1364 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2773)
+    %1365 = "ttir.add"(%1345, %1363, %1364) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2773)
+    %1366 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2774)
+    %1367 = "ttir.squeeze"(%1365, %1366) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2774)
+    %1368 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2775)
+    %1369 = "ttir.transpose"(%1367, %1368) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2775)
+    %1370 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2776)
+    %1371 = "ttir.matmul"(%1337, %1369, %1370) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2776)
+    %1372 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2777)
+    %1373 = "ttir.unsqueeze"(%1371, %1372) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2777)
+    %1374 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2778)
+    %1375 = "ttir.multiply"(%1373, %arg97, %1374) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2778)
+    %1376 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2779)
+    %1377 = "ttir.add"(%1375, %arg98, %1376) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2779)
+    %1378 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2780)
+    %1379 = "ttir.softmax"(%1377, %1378) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2780)
+    %1380 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2781)
+    %1381 = "ttir.squeeze"(%1379, %1380) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2781)
+    %1382 = tensor.empty() : tensor<12x3200xf32> loc(#loc2782)
+    %1383 = "ttir.matmul"(%1297, %arg365, %1382) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2782)
+    %1384 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2783)
+    %1385 = "ttir.reshape"(%1383, %1384) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2783)
+    %1386 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2784)
+    %1387 = "ttir.transpose"(%1385, %1386) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2784)
+    %1388 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2785)
+    %1389 = "ttir.transpose"(%1387, %1388) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2785)
+    %1390 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2786)
+    %1391 = "ttir.squeeze"(%1389, %1390) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2786)
+    %1392 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2787)
+    %1393 = "ttir.transpose"(%1391, %1392) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2787)
+    %1394 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2788)
+    %1395 = "ttir.matmul"(%1381, %1393, %1394) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2788)
+    %1396 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2789)
+    %1397 = "ttir.unsqueeze"(%1395, %1396) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2789)
+    %1398 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2790)
+    %1399 = "ttir.transpose"(%1397, %1398) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2790)
+    %1400 = tensor.empty() : tensor<12x3200xf32> loc(#loc2791)
+    %1401 = "ttir.reshape"(%1399, %1400) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2791)
+    %1402 = tensor.empty() : tensor<12x3200xf32> loc(#loc2792)
+    %1403 = "ttir.matmul"(%1401, %arg366, %1402) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2792)
+    %1404 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2793)
+    %1405 = "ttir.unsqueeze"(%1403, %1404) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2793)
+    %1406 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2794)
+    %1407 = "ttir.add"(%1281, %1405, %1406) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2794)
+    %1408 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2795)
+    %1409 = "ttir.multiply"(%1407, %1407, %1408) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2795)
+    %1410 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2796)
+    %1411 = "ttir.mean"(%1409, %1410) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2796)
+    %1412 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2797)
+    %1413 = "ttir.add"(%1411, %arg99, %1412) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2797)
+    %1414 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2798)
+    %1415 = "ttir.sqrt"(%1413, %1414) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2798)
+    %1416 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2799)
+    %1417 = "ttir.reciprocal"(%1415, %1416) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2799)
+    %1418 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2800)
+    %1419 = "ttir.multiply"(%1407, %1417, %1418) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2800)
+    %1420 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2801)
+    %1421 = "ttir.multiply"(%arg367, %1419, %1420) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2801)
+    %1422 = tensor.empty() : tensor<12x3200xf32> loc(#loc2802)
+    %1423 = "ttir.squeeze"(%1421, %1422) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2802)
+    %1424 = tensor.empty() : tensor<12x8640xf32> loc(#loc2803)
+    %1425 = "ttir.matmul"(%1423, %arg368, %1424) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2803)
+    %1426 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2804)
+    %1427 = "ttir.unsqueeze"(%1425, %1426) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2804)
+    %1428 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2805)
+    %1429 = "ttir.sigmoid"(%1427, %1428) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2805)
+    %1430 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2806)
+    %1431 = "ttir.multiply"(%1427, %1429, %1430) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2806)
+    %1432 = tensor.empty() : tensor<12x8640xf32> loc(#loc2807)
+    %1433 = "ttir.matmul"(%1423, %arg369, %1432) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2807)
+    %1434 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2808)
+    %1435 = "ttir.unsqueeze"(%1433, %1434) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2808)
+    %1436 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2809)
+    %1437 = "ttir.multiply"(%1431, %1435, %1436) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2809)
+    %1438 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2810)
+    %1439 = "ttir.matmul"(%1437, %arg370, %1438) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2810)
+    %1440 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2811)
+    %1441 = "ttir.add"(%1407, %1439, %1440) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2811)
+    %1442 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2812)
+    %1443 = "ttir.multiply"(%1441, %1441, %1442) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2812)
+    %1444 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2813)
+    %1445 = "ttir.mean"(%1443, %1444) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2813)
+    %1446 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2814)
+    %1447 = "ttir.add"(%1445, %arg100, %1446) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2814)
+    %1448 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2815)
+    %1449 = "ttir.sqrt"(%1447, %1448) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2815)
+    %1450 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2816)
+    %1451 = "ttir.reciprocal"(%1449, %1450) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2816)
+    %1452 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2817)
+    %1453 = "ttir.multiply"(%1441, %1451, %1452) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2817)
+    %1454 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2818)
+    %1455 = "ttir.multiply"(%arg371, %1453, %1454) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2818)
+    %1456 = tensor.empty() : tensor<12x3200xf32> loc(#loc2819)
+    %1457 = "ttir.squeeze"(%1455, %1456) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2819)
+    %1458 = tensor.empty() : tensor<12x3200xf32> loc(#loc2820)
+    %1459 = "ttir.matmul"(%1457, %arg372, %1458) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2820)
+    %1460 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2821)
+    %1461 = "ttir.reshape"(%1459, %1460) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2821)
+    %1462 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2822)
+    %1463 = "ttir.transpose"(%1461, %1462) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2822)
+    %1464 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2823)
+    %1465 = "ttir.concat"(%arg101, %arg101, %1464) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2823)
+    %1466 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2824)
+    %1467 = "ttir.sin"(%1465, %1466) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2824)
+    %1468 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2825)
+    %1469 = "ttir.unsqueeze"(%1467, %1468) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2825)
+    %1470 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2826)
+    %1471 = "ttir.multiply"(%1463, %1469, %1470) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2826)
+    %1472 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2827)
+    %1473 = "ttir.transpose"(%1463, %1472) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2827)
+    %1474 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2828)
+    %1475 = "ttir.matmul"(%arg102, %1473, %1474) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2828)
+    %1476 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2829)
+    %1477 = "ttir.transpose"(%1475, %1476) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2829)
+    %1478 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2830)
+    %1479 = "ttir.multiply"(%1477, %arg103, %1478) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2830)
+    %1480 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2831)
+    %1481 = "ttir.transpose"(%1463, %1480) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2831)
+    %1482 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2832)
+    %1483 = "ttir.matmul"(%arg104, %1481, %1482) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2832)
+    %1484 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2833)
+    %1485 = "ttir.transpose"(%1483, %1484) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2833)
+    %1486 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2834)
+    %1487 = "ttir.concat"(%1479, %1485, %1486) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2834)
+    %1488 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2835)
+    %1489 = "ttir.cos"(%1465, %1488) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2835)
+    %1490 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2836)
+    %1491 = "ttir.unsqueeze"(%1489, %1490) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2836)
+    %1492 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2837)
+    %1493 = "ttir.multiply"(%1487, %1491, %1492) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2837)
+    %1494 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2838)
+    %1495 = "ttir.add"(%1471, %1493, %1494) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2838)
+    %1496 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2839)
+    %1497 = "ttir.squeeze"(%1495, %1496) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2839)
+    %1498 = tensor.empty() : tensor<12x3200xf32> loc(#loc2840)
+    %1499 = "ttir.matmul"(%1457, %arg373, %1498) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2840)
+    %1500 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2841)
+    %1501 = "ttir.reshape"(%1499, %1500) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2841)
+    %1502 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2842)
+    %1503 = "ttir.transpose"(%1501, %1502) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2842)
+    %1504 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2843)
+    %1505 = "ttir.multiply"(%1503, %1469, %1504) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2843)
+    %1506 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2844)
+    %1507 = "ttir.transpose"(%1503, %1506) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2844)
+    %1508 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2845)
+    %1509 = "ttir.matmul"(%arg105, %1507, %1508) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2845)
+    %1510 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2846)
+    %1511 = "ttir.transpose"(%1509, %1510) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2846)
+    %1512 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2847)
+    %1513 = "ttir.multiply"(%1511, %arg106, %1512) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2847)
+    %1514 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2848)
+    %1515 = "ttir.transpose"(%1503, %1514) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2848)
+    %1516 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2849)
+    %1517 = "ttir.matmul"(%arg107, %1515, %1516) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2849)
+    %1518 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2850)
+    %1519 = "ttir.transpose"(%1517, %1518) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2850)
+    %1520 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2851)
+    %1521 = "ttir.concat"(%1513, %1519, %1520) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2851)
+    %1522 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2852)
+    %1523 = "ttir.multiply"(%1521, %1491, %1522) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2852)
+    %1524 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2853)
+    %1525 = "ttir.add"(%1505, %1523, %1524) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2853)
+    %1526 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2854)
+    %1527 = "ttir.squeeze"(%1525, %1526) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2854)
+    %1528 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2855)
+    %1529 = "ttir.transpose"(%1527, %1528) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2855)
+    %1530 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2856)
+    %1531 = "ttir.matmul"(%1497, %1529, %1530) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2856)
+    %1532 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2857)
+    %1533 = "ttir.unsqueeze"(%1531, %1532) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2857)
+    %1534 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2858)
+    %1535 = "ttir.multiply"(%1533, %arg108, %1534) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2858)
+    %1536 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2859)
+    %1537 = "ttir.add"(%1535, %arg109, %1536) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2859)
+    %1538 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2860)
+    %1539 = "ttir.softmax"(%1537, %1538) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2860)
+    %1540 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2861)
+    %1541 = "ttir.squeeze"(%1539, %1540) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2861)
+    %1542 = tensor.empty() : tensor<12x3200xf32> loc(#loc2862)
+    %1543 = "ttir.matmul"(%1457, %arg374, %1542) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2862)
+    %1544 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2863)
+    %1545 = "ttir.reshape"(%1543, %1544) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2863)
+    %1546 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2864)
+    %1547 = "ttir.transpose"(%1545, %1546) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2864)
+    %1548 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2865)
+    %1549 = "ttir.transpose"(%1547, %1548) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2865)
+    %1550 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2866)
+    %1551 = "ttir.squeeze"(%1549, %1550) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2866)
+    %1552 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2867)
+    %1553 = "ttir.transpose"(%1551, %1552) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2867)
+    %1554 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2868)
+    %1555 = "ttir.matmul"(%1541, %1553, %1554) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2868)
+    %1556 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2869)
+    %1557 = "ttir.unsqueeze"(%1555, %1556) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2869)
+    %1558 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2870)
+    %1559 = "ttir.transpose"(%1557, %1558) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2870)
+    %1560 = tensor.empty() : tensor<12x3200xf32> loc(#loc2871)
+    %1561 = "ttir.reshape"(%1559, %1560) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2871)
+    %1562 = tensor.empty() : tensor<12x3200xf32> loc(#loc2872)
+    %1563 = "ttir.matmul"(%1561, %arg375, %1562) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2872)
+    %1564 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2873)
+    %1565 = "ttir.unsqueeze"(%1563, %1564) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2873)
+    %1566 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2874)
+    %1567 = "ttir.add"(%1441, %1565, %1566) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2874)
+    %1568 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2875)
+    %1569 = "ttir.multiply"(%1567, %1567, %1568) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2875)
+    %1570 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2876)
+    %1571 = "ttir.mean"(%1569, %1570) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2876)
+    %1572 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2877)
+    %1573 = "ttir.add"(%1571, %arg110, %1572) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2877)
+    %1574 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2878)
+    %1575 = "ttir.sqrt"(%1573, %1574) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2878)
+    %1576 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2879)
+    %1577 = "ttir.reciprocal"(%1575, %1576) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2879)
+    %1578 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2880)
+    %1579 = "ttir.multiply"(%1567, %1577, %1578) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2880)
+    %1580 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2881)
+    %1581 = "ttir.multiply"(%arg376, %1579, %1580) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2881)
+    %1582 = tensor.empty() : tensor<12x3200xf32> loc(#loc2882)
+    %1583 = "ttir.squeeze"(%1581, %1582) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2882)
+    %1584 = tensor.empty() : tensor<12x8640xf32> loc(#loc2883)
+    %1585 = "ttir.matmul"(%1583, %arg377, %1584) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2883)
+    %1586 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2884)
+    %1587 = "ttir.unsqueeze"(%1585, %1586) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2884)
+    %1588 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2885)
+    %1589 = "ttir.sigmoid"(%1587, %1588) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2885)
+    %1590 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2886)
+    %1591 = "ttir.multiply"(%1587, %1589, %1590) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2886)
+    %1592 = tensor.empty() : tensor<12x8640xf32> loc(#loc2887)
+    %1593 = "ttir.matmul"(%1583, %arg378, %1592) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2887)
+    %1594 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2888)
+    %1595 = "ttir.unsqueeze"(%1593, %1594) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2888)
+    %1596 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2889)
+    %1597 = "ttir.multiply"(%1591, %1595, %1596) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2889)
+    %1598 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2890)
+    %1599 = "ttir.matmul"(%1597, %arg379, %1598) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2890)
+    %1600 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2891)
+    %1601 = "ttir.add"(%1567, %1599, %1600) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2891)
+    %1602 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2892)
+    %1603 = "ttir.multiply"(%1601, %1601, %1602) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2892)
+    %1604 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2893)
+    %1605 = "ttir.mean"(%1603, %1604) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2893)
+    %1606 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2894)
+    %1607 = "ttir.add"(%1605, %arg111, %1606) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2894)
+    %1608 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2895)
+    %1609 = "ttir.sqrt"(%1607, %1608) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2895)
+    %1610 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2896)
+    %1611 = "ttir.reciprocal"(%1609, %1610) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2896)
+    %1612 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2897)
+    %1613 = "ttir.multiply"(%1601, %1611, %1612) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2897)
+    %1614 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2898)
+    %1615 = "ttir.multiply"(%arg380, %1613, %1614) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2898)
+    %1616 = tensor.empty() : tensor<12x3200xf32> loc(#loc2899)
+    %1617 = "ttir.squeeze"(%1615, %1616) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2899)
+    %1618 = tensor.empty() : tensor<12x3200xf32> loc(#loc2900)
+    %1619 = "ttir.matmul"(%1617, %arg381, %1618) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2900)
+    %1620 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2901)
+    %1621 = "ttir.reshape"(%1619, %1620) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2901)
+    %1622 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2902)
+    %1623 = "ttir.transpose"(%1621, %1622) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2902)
+    %1624 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2903)
+    %1625 = "ttir.concat"(%arg112, %arg112, %1624) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2903)
+    %1626 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2904)
+    %1627 = "ttir.sin"(%1625, %1626) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2904)
+    %1628 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2905)
+    %1629 = "ttir.unsqueeze"(%1627, %1628) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2905)
+    %1630 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2906)
+    %1631 = "ttir.multiply"(%1623, %1629, %1630) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2906)
+    %1632 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2907)
+    %1633 = "ttir.transpose"(%1623, %1632) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2907)
+    %1634 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2908)
+    %1635 = "ttir.matmul"(%arg113, %1633, %1634) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2908)
+    %1636 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2909)
+    %1637 = "ttir.transpose"(%1635, %1636) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2909)
+    %1638 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2910)
+    %1639 = "ttir.multiply"(%1637, %arg114, %1638) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2910)
+    %1640 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2911)
+    %1641 = "ttir.transpose"(%1623, %1640) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2911)
+    %1642 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2912)
+    %1643 = "ttir.matmul"(%arg115, %1641, %1642) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2912)
+    %1644 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2913)
+    %1645 = "ttir.transpose"(%1643, %1644) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2913)
+    %1646 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2914)
+    %1647 = "ttir.concat"(%1639, %1645, %1646) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2914)
+    %1648 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2915)
+    %1649 = "ttir.cos"(%1625, %1648) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2915)
+    %1650 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2916)
+    %1651 = "ttir.unsqueeze"(%1649, %1650) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2916)
+    %1652 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2917)
+    %1653 = "ttir.multiply"(%1647, %1651, %1652) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2917)
+    %1654 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2918)
+    %1655 = "ttir.add"(%1631, %1653, %1654) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2918)
+    %1656 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2919)
+    %1657 = "ttir.squeeze"(%1655, %1656) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2919)
+    %1658 = tensor.empty() : tensor<12x3200xf32> loc(#loc2920)
+    %1659 = "ttir.matmul"(%1617, %arg382, %1658) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2920)
+    %1660 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2921)
+    %1661 = "ttir.reshape"(%1659, %1660) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2921)
+    %1662 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2922)
+    %1663 = "ttir.transpose"(%1661, %1662) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2922)
+    %1664 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2923)
+    %1665 = "ttir.multiply"(%1663, %1629, %1664) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2923)
+    %1666 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2924)
+    %1667 = "ttir.transpose"(%1663, %1666) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2924)
+    %1668 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2925)
+    %1669 = "ttir.matmul"(%arg116, %1667, %1668) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2925)
+    %1670 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2926)
+    %1671 = "ttir.transpose"(%1669, %1670) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2926)
+    %1672 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2927)
+    %1673 = "ttir.multiply"(%1671, %arg117, %1672) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2927)
+    %1674 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2928)
+    %1675 = "ttir.transpose"(%1663, %1674) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2928)
+    %1676 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2929)
+    %1677 = "ttir.matmul"(%arg118, %1675, %1676) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2929)
+    %1678 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2930)
+    %1679 = "ttir.transpose"(%1677, %1678) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2930)
+    %1680 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2931)
+    %1681 = "ttir.concat"(%1673, %1679, %1680) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2931)
+    %1682 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2932)
+    %1683 = "ttir.multiply"(%1681, %1651, %1682) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2932)
+    %1684 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2933)
+    %1685 = "ttir.add"(%1665, %1683, %1684) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2933)
+    %1686 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2934)
+    %1687 = "ttir.squeeze"(%1685, %1686) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2934)
+    %1688 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2935)
+    %1689 = "ttir.transpose"(%1687, %1688) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2935)
+    %1690 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2936)
+    %1691 = "ttir.matmul"(%1657, %1689, %1690) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2936)
+    %1692 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2937)
+    %1693 = "ttir.unsqueeze"(%1691, %1692) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2937)
+    %1694 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2938)
+    %1695 = "ttir.multiply"(%1693, %arg119, %1694) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2938)
+    %1696 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2939)
+    %1697 = "ttir.add"(%1695, %arg120, %1696) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2939)
+    %1698 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2940)
+    %1699 = "ttir.softmax"(%1697, %1698) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2940)
+    %1700 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2941)
+    %1701 = "ttir.squeeze"(%1699, %1700) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2941)
+    %1702 = tensor.empty() : tensor<12x3200xf32> loc(#loc2942)
+    %1703 = "ttir.matmul"(%1617, %arg383, %1702) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2942)
+    %1704 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2943)
+    %1705 = "ttir.reshape"(%1703, %1704) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2943)
+    %1706 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2944)
+    %1707 = "ttir.transpose"(%1705, %1706) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2944)
+    %1708 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2945)
+    %1709 = "ttir.transpose"(%1707, %1708) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2945)
+    %1710 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2946)
+    %1711 = "ttir.squeeze"(%1709, %1710) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2946)
+    %1712 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2947)
+    %1713 = "ttir.transpose"(%1711, %1712) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2947)
+    %1714 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2948)
+    %1715 = "ttir.matmul"(%1701, %1713, %1714) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2948)
+    %1716 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2949)
+    %1717 = "ttir.unsqueeze"(%1715, %1716) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2949)
+    %1718 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2950)
+    %1719 = "ttir.transpose"(%1717, %1718) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2950)
+    %1720 = tensor.empty() : tensor<12x3200xf32> loc(#loc2951)
+    %1721 = "ttir.reshape"(%1719, %1720) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2951)
+    %1722 = tensor.empty() : tensor<12x3200xf32> loc(#loc2952)
+    %1723 = "ttir.matmul"(%1721, %arg384, %1722) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2952)
+    %1724 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2953)
+    %1725 = "ttir.unsqueeze"(%1723, %1724) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2953)
+    %1726 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2954)
+    %1727 = "ttir.add"(%1601, %1725, %1726) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2954)
+    %1728 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2955)
+    %1729 = "ttir.multiply"(%1727, %1727, %1728) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2955)
+    %1730 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2956)
+    %1731 = "ttir.mean"(%1729, %1730) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2956)
+    %1732 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2957)
+    %1733 = "ttir.add"(%1731, %arg121, %1732) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2957)
+    %1734 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2958)
+    %1735 = "ttir.sqrt"(%1733, %1734) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2958)
+    %1736 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2959)
+    %1737 = "ttir.reciprocal"(%1735, %1736) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2959)
+    %1738 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2960)
+    %1739 = "ttir.multiply"(%1727, %1737, %1738) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2960)
+    %1740 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2961)
+    %1741 = "ttir.multiply"(%arg385, %1739, %1740) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2961)
+    %1742 = tensor.empty() : tensor<12x3200xf32> loc(#loc2962)
+    %1743 = "ttir.squeeze"(%1741, %1742) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2962)
+    %1744 = tensor.empty() : tensor<12x8640xf32> loc(#loc2963)
+    %1745 = "ttir.matmul"(%1743, %arg386, %1744) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2963)
+    %1746 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2964)
+    %1747 = "ttir.unsqueeze"(%1745, %1746) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2964)
+    %1748 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2965)
+    %1749 = "ttir.sigmoid"(%1747, %1748) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2965)
+    %1750 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2966)
+    %1751 = "ttir.multiply"(%1747, %1749, %1750) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2966)
+    %1752 = tensor.empty() : tensor<12x8640xf32> loc(#loc2967)
+    %1753 = "ttir.matmul"(%1743, %arg387, %1752) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2967)
+    %1754 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2968)
+    %1755 = "ttir.unsqueeze"(%1753, %1754) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2968)
+    %1756 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2969)
+    %1757 = "ttir.multiply"(%1751, %1755, %1756) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2969)
+    %1758 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2970)
+    %1759 = "ttir.matmul"(%1757, %arg388, %1758) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2970)
+    %1760 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2971)
+    %1761 = "ttir.add"(%1727, %1759, %1760) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2971)
+    %1762 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2972)
+    %1763 = "ttir.multiply"(%1761, %1761, %1762) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2972)
+    %1764 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2973)
+    %1765 = "ttir.mean"(%1763, %1764) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2973)
+    %1766 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2974)
+    %1767 = "ttir.add"(%1765, %arg122, %1766) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2974)
+    %1768 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2975)
+    %1769 = "ttir.sqrt"(%1767, %1768) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2975)
+    %1770 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2976)
+    %1771 = "ttir.reciprocal"(%1769, %1770) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2976)
+    %1772 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2977)
+    %1773 = "ttir.multiply"(%1761, %1771, %1772) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2977)
+    %1774 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2978)
+    %1775 = "ttir.multiply"(%arg389, %1773, %1774) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2978)
+    %1776 = tensor.empty() : tensor<12x3200xf32> loc(#loc2979)
+    %1777 = "ttir.squeeze"(%1775, %1776) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2979)
+    %1778 = tensor.empty() : tensor<12x3200xf32> loc(#loc2980)
+    %1779 = "ttir.matmul"(%1777, %arg390, %1778) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2980)
+    %1780 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2981)
+    %1781 = "ttir.reshape"(%1779, %1780) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2981)
+    %1782 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2982)
+    %1783 = "ttir.transpose"(%1781, %1782) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2982)
+    %1784 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2983)
+    %1785 = "ttir.concat"(%arg123, %arg123, %1784) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2983)
+    %1786 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2984)
+    %1787 = "ttir.sin"(%1785, %1786) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2984)
+    %1788 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2985)
+    %1789 = "ttir.unsqueeze"(%1787, %1788) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2985)
+    %1790 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2986)
+    %1791 = "ttir.multiply"(%1783, %1789, %1790) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2986)
+    %1792 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2987)
+    %1793 = "ttir.transpose"(%1783, %1792) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2987)
+    %1794 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2988)
+    %1795 = "ttir.matmul"(%arg124, %1793, %1794) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2988)
+    %1796 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2989)
+    %1797 = "ttir.transpose"(%1795, %1796) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2989)
+    %1798 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2990)
+    %1799 = "ttir.multiply"(%1797, %arg125, %1798) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2990)
+    %1800 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2991)
+    %1801 = "ttir.transpose"(%1783, %1800) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2991)
+    %1802 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2992)
+    %1803 = "ttir.matmul"(%arg126, %1801, %1802) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2992)
+    %1804 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2993)
+    %1805 = "ttir.transpose"(%1803, %1804) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2993)
+    %1806 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2994)
+    %1807 = "ttir.concat"(%1799, %1805, %1806) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2994)
+    %1808 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2995)
+    %1809 = "ttir.cos"(%1785, %1808) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2995)
+    %1810 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2996)
+    %1811 = "ttir.unsqueeze"(%1809, %1810) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2996)
+    %1812 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2997)
+    %1813 = "ttir.multiply"(%1807, %1811, %1812) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2997)
+    %1814 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2998)
+    %1815 = "ttir.add"(%1791, %1813, %1814) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2998)
+    %1816 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2999)
+    %1817 = "ttir.squeeze"(%1815, %1816) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2999)
+    %1818 = tensor.empty() : tensor<12x3200xf32> loc(#loc3000)
+    %1819 = "ttir.matmul"(%1777, %arg391, %1818) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3000)
+    %1820 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3001)
+    %1821 = "ttir.reshape"(%1819, %1820) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3001)
+    %1822 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3002)
+    %1823 = "ttir.transpose"(%1821, %1822) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3002)
+    %1824 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3003)
+    %1825 = "ttir.multiply"(%1823, %1789, %1824) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3003)
+    %1826 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3004)
+    %1827 = "ttir.transpose"(%1823, %1826) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3004)
+    %1828 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3005)
+    %1829 = "ttir.matmul"(%arg127, %1827, %1828) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3005)
+    %1830 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3006)
+    %1831 = "ttir.transpose"(%1829, %1830) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3006)
+    %1832 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3007)
+    %1833 = "ttir.multiply"(%1831, %arg128, %1832) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3007)
+    %1834 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3008)
+    %1835 = "ttir.transpose"(%1823, %1834) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3008)
+    %1836 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3009)
+    %1837 = "ttir.matmul"(%arg129, %1835, %1836) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3009)
+    %1838 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3010)
+    %1839 = "ttir.transpose"(%1837, %1838) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3010)
+    %1840 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3011)
+    %1841 = "ttir.concat"(%1833, %1839, %1840) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3011)
+    %1842 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3012)
+    %1843 = "ttir.multiply"(%1841, %1811, %1842) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3012)
+    %1844 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3013)
+    %1845 = "ttir.add"(%1825, %1843, %1844) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3013)
+    %1846 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3014)
+    %1847 = "ttir.squeeze"(%1845, %1846) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3014)
+    %1848 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3015)
+    %1849 = "ttir.transpose"(%1847, %1848) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3015)
+    %1850 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3016)
+    %1851 = "ttir.matmul"(%1817, %1849, %1850) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3016)
+    %1852 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3017)
+    %1853 = "ttir.unsqueeze"(%1851, %1852) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3017)
+    %1854 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3018)
+    %1855 = "ttir.multiply"(%1853, %arg130, %1854) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3018)
+    %1856 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3019)
+    %1857 = "ttir.add"(%1855, %arg131, %1856) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3019)
+    %1858 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3020)
+    %1859 = "ttir.softmax"(%1857, %1858) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3020)
+    %1860 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3021)
+    %1861 = "ttir.squeeze"(%1859, %1860) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3021)
+    %1862 = tensor.empty() : tensor<12x3200xf32> loc(#loc3022)
+    %1863 = "ttir.matmul"(%1777, %arg392, %1862) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3022)
+    %1864 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3023)
+    %1865 = "ttir.reshape"(%1863, %1864) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3023)
+    %1866 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3024)
+    %1867 = "ttir.transpose"(%1865, %1866) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3024)
+    %1868 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3025)
+    %1869 = "ttir.transpose"(%1867, %1868) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3025)
+    %1870 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3026)
+    %1871 = "ttir.squeeze"(%1869, %1870) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3026)
+    %1872 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3027)
+    %1873 = "ttir.transpose"(%1871, %1872) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3027)
+    %1874 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3028)
+    %1875 = "ttir.matmul"(%1861, %1873, %1874) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3028)
+    %1876 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3029)
+    %1877 = "ttir.unsqueeze"(%1875, %1876) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3029)
+    %1878 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3030)
+    %1879 = "ttir.transpose"(%1877, %1878) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3030)
+    %1880 = tensor.empty() : tensor<12x3200xf32> loc(#loc3031)
+    %1881 = "ttir.reshape"(%1879, %1880) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3031)
+    %1882 = tensor.empty() : tensor<12x3200xf32> loc(#loc3032)
+    %1883 = "ttir.matmul"(%1881, %arg393, %1882) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3032)
+    %1884 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3033)
+    %1885 = "ttir.unsqueeze"(%1883, %1884) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3033)
+    %1886 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3034)
+    %1887 = "ttir.add"(%1761, %1885, %1886) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3034)
+    %1888 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3035)
+    %1889 = "ttir.multiply"(%1887, %1887, %1888) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3035)
+    %1890 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3036)
+    %1891 = "ttir.mean"(%1889, %1890) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3036)
+    %1892 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3037)
+    %1893 = "ttir.add"(%1891, %arg132, %1892) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3037)
+    %1894 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3038)
+    %1895 = "ttir.sqrt"(%1893, %1894) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3038)
+    %1896 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3039)
+    %1897 = "ttir.reciprocal"(%1895, %1896) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3039)
+    %1898 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3040)
+    %1899 = "ttir.multiply"(%1887, %1897, %1898) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3040)
+    %1900 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3041)
+    %1901 = "ttir.multiply"(%arg394, %1899, %1900) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3041)
+    %1902 = tensor.empty() : tensor<12x3200xf32> loc(#loc3042)
+    %1903 = "ttir.squeeze"(%1901, %1902) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3042)
+    %1904 = tensor.empty() : tensor<12x8640xf32> loc(#loc3043)
+    %1905 = "ttir.matmul"(%1903, %arg395, %1904) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc3043)
+    %1906 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3044)
+    %1907 = "ttir.unsqueeze"(%1905, %1906) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3044)
+    %1908 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3045)
+    %1909 = "ttir.sigmoid"(%1907, %1908) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3045)
+    %1910 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3046)
+    %1911 = "ttir.multiply"(%1907, %1909, %1910) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3046)
+    %1912 = tensor.empty() : tensor<12x8640xf32> loc(#loc3047)
+    %1913 = "ttir.matmul"(%1903, %arg396, %1912) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc3047)
+    %1914 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3048)
+    %1915 = "ttir.unsqueeze"(%1913, %1914) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3048)
+    %1916 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3049)
+    %1917 = "ttir.multiply"(%1911, %1915, %1916) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3049)
+    %1918 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3050)
+    %1919 = "ttir.matmul"(%1917, %arg397, %1918) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3050)
+    %1920 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3051)
+    %1921 = "ttir.add"(%1887, %1919, %1920) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3051)
+    %1922 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3052)
+    %1923 = "ttir.multiply"(%1921, %1921, %1922) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3052)
+    %1924 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3053)
+    %1925 = "ttir.mean"(%1923, %1924) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3053)
+    %1926 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3054)
+    %1927 = "ttir.add"(%1925, %arg133, %1926) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3054)
+    %1928 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3055)
+    %1929 = "ttir.sqrt"(%1927, %1928) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3055)
+    %1930 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3056)
+    %1931 = "ttir.reciprocal"(%1929, %1930) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3056)
+    %1932 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3057)
+    %1933 = "ttir.multiply"(%1921, %1931, %1932) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3057)
+    %1934 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3058)
+    %1935 = "ttir.multiply"(%arg398, %1933, %1934) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3058)
+    %1936 = tensor.empty() : tensor<12x3200xf32> loc(#loc3059)
+    %1937 = "ttir.squeeze"(%1935, %1936) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3059)
+    %1938 = tensor.empty() : tensor<12x3200xf32> loc(#loc3060)
+    %1939 = "ttir.matmul"(%1937, %arg399, %1938) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3060)
+    %1940 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3061)
+    %1941 = "ttir.reshape"(%1939, %1940) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3061)
+    %1942 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3062)
+    %1943 = "ttir.transpose"(%1941, %1942) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3062)
+    %1944 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3063)
+    %1945 = "ttir.concat"(%arg134, %arg134, %1944) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3063)
+    %1946 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3064)
+    %1947 = "ttir.sin"(%1945, %1946) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3064)
+    %1948 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc3065)
+    %1949 = "ttir.unsqueeze"(%1947, %1948) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc3065)
+    %1950 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3066)
+    %1951 = "ttir.multiply"(%1943, %1949, %1950) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3066)
+    %1952 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3067)
+    %1953 = "ttir.transpose"(%1943, %1952) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3067)
+    %1954 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3068)
+    %1955 = "ttir.matmul"(%arg135, %1953, %1954) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3068)
+    %1956 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3069)
+    %1957 = "ttir.transpose"(%1955, %1956) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3069)
+    %1958 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3070)
+    %1959 = "ttir.multiply"(%1957, %arg136, %1958) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3070)
+    %1960 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3071)
+    %1961 = "ttir.transpose"(%1943, %1960) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3071)
+    %1962 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3072)
+    %1963 = "ttir.matmul"(%arg137, %1961, %1962) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3072)
+    %1964 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3073)
+    %1965 = "ttir.transpose"(%1963, %1964) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3073)
+    %1966 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3074)
+    %1967 = "ttir.concat"(%1959, %1965, %1966) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3074)
+    %1968 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3075)
+    %1969 = "ttir.cos"(%1945, %1968) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3075)
+    %1970 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc3076)
+    %1971 = "ttir.unsqueeze"(%1969, %1970) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc3076)
+    %1972 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3077)
+    %1973 = "ttir.multiply"(%1967, %1971, %1972) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3077)
+    %1974 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3078)
+    %1975 = "ttir.add"(%1951, %1973, %1974) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3078)
+    %1976 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3079)
+    %1977 = "ttir.squeeze"(%1975, %1976) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3079)
+    %1978 = tensor.empty() : tensor<12x3200xf32> loc(#loc3080)
+    %1979 = "ttir.matmul"(%1937, %arg400, %1978) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3080)
+    %1980 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3081)
+    %1981 = "ttir.reshape"(%1979, %1980) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3081)
+    %1982 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3082)
+    %1983 = "ttir.transpose"(%1981, %1982) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3082)
+    %1984 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3083)
+    %1985 = "ttir.multiply"(%1983, %1949, %1984) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3083)
+    %1986 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3084)
+    %1987 = "ttir.transpose"(%1983, %1986) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3084)
+    %1988 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3085)
+    %1989 = "ttir.matmul"(%arg138, %1987, %1988) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3085)
+    %1990 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3086)
+    %1991 = "ttir.transpose"(%1989, %1990) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3086)
+    %1992 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3087)
+    %1993 = "ttir.multiply"(%1991, %arg139, %1992) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3087)
+    %1994 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3088)
+    %1995 = "ttir.transpose"(%1983, %1994) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3088)
+    %1996 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3089)
+    %1997 = "ttir.matmul"(%arg140, %1995, %1996) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3089)
+    %1998 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3090)
+    %1999 = "ttir.transpose"(%1997, %1998) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3090)
+    %2000 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3091)
+    %2001 = "ttir.concat"(%1993, %1999, %2000) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3091)
+    %2002 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3092)
+    %2003 = "ttir.multiply"(%2001, %1971, %2002) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3092)
+    %2004 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3093)
+    %2005 = "ttir.add"(%1985, %2003, %2004) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3093)
+    %2006 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3094)
+    %2007 = "ttir.squeeze"(%2005, %2006) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3094)
+    %2008 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3095)
+    %2009 = "ttir.transpose"(%2007, %2008) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3095)
+    %2010 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3096)
+    %2011 = "ttir.matmul"(%1977, %2009, %2010) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3096)
+    %2012 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3097)
+    %2013 = "ttir.unsqueeze"(%2011, %2012) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3097)
+    %2014 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3098)
+    %2015 = "ttir.multiply"(%2013, %arg141, %2014) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3098)
+    %2016 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3099)
+    %2017 = "ttir.add"(%2015, %arg142, %2016) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3099)
+    %2018 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3100)
+    %2019 = "ttir.softmax"(%2017, %2018) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3100)
+    %2020 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3101)
+    %2021 = "ttir.squeeze"(%2019, %2020) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3101)
+    %2022 = tensor.empty() : tensor<12x3200xf32> loc(#loc3102)
+    %2023 = "ttir.matmul"(%1937, %arg401, %2022) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3102)
+    %2024 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3103)
+    %2025 = "ttir.reshape"(%2023, %2024) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3103)
+    %2026 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3104)
+    %2027 = "ttir.transpose"(%2025, %2026) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3104)
+    %2028 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3105)
+    %2029 = "ttir.transpose"(%2027, %2028) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3105)
+    %2030 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3106)
+    %2031 = "ttir.squeeze"(%2029, %2030) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3106)
+    %2032 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3107)
+    %2033 = "ttir.transpose"(%2031, %2032) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3107)
+    %2034 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3108)
+    %2035 = "ttir.matmul"(%2021, %2033, %2034) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3108)
+    %2036 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3109)
+    %2037 = "ttir.unsqueeze"(%2035, %2036) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3109)
+    %2038 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3110)
+    %2039 = "ttir.transpose"(%2037, %2038) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3110)
+    %2040 = tensor.empty() : tensor<12x3200xf32> loc(#loc3111)
+    %2041 = "ttir.reshape"(%2039, %2040) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3111)
+    %2042 = tensor.empty() : tensor<12x3200xf32> loc(#loc3112)
+    %2043 = "ttir.matmul"(%2041, %arg402, %2042) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3112)
+    %2044 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3113)
+    %2045 = "ttir.unsqueeze"(%2043, %2044) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3113)
+    %2046 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3114)
+    %2047 = "ttir.add"(%1921, %2045, %2046) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3114)
+    %2048 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3115)
+    %2049 = "ttir.multiply"(%2047, %2047, %2048) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3115)
+    %2050 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3116)
+    %2051 = "ttir.mean"(%2049, %2050) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3116)
+    %2052 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3117)
+    %2053 = "ttir.add"(%2051, %arg143, %2052) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3117)
+    %2054 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3118)
+    %2055 = "ttir.sqrt"(%2053, %2054) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3118)
+    %2056 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3119)
+    %2057 = "ttir.reciprocal"(%2055, %2056) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3119)
+    %2058 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3120)
+    %2059 = "ttir.multiply"(%2047, %2057, %2058) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3120)
+    %2060 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3121)
+    %2061 = "ttir.multiply"(%arg403, %2059, %2060) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3121)
+    %2062 = tensor.empty() : tensor<12x3200xf32> loc(#loc3122)
+    %2063 = "ttir.squeeze"(%2061, %2062) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3122)
+    %2064 = tensor.empty() : tensor<12x8640xf32> loc(#loc3123)
+    %2065 = "ttir.matmul"(%2063, %arg404, %2064) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc3123)
+    %2066 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3124)
+    %2067 = "ttir.unsqueeze"(%2065, %2066) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3124)
+    %2068 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3125)
+    %2069 = "ttir.sigmoid"(%2067, %2068) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3125)
+    %2070 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3126)
+    %2071 = "ttir.multiply"(%2067, %2069, %2070) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3126)
+    %2072 = tensor.empty() : tensor<12x8640xf32> loc(#loc3127)
+    %2073 = "ttir.matmul"(%2063, %arg405, %2072) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc3127)
+    %2074 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3128)
+    %2075 = "ttir.unsqueeze"(%2073, %2074) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3128)
+    %2076 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3129)
+    %2077 = "ttir.multiply"(%2071, %2075, %2076) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3129)
+    %2078 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3130)
+    %2079 = "ttir.matmul"(%2077, %arg406, %2078) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3130)
+    %2080 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3131)
+    %2081 = "ttir.add"(%2047, %2079, %2080) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3131)
+    %2082 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3132)
+    %2083 = "ttir.multiply"(%2081, %2081, %2082) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3132)
+    %2084 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3133)
+    %2085 = "ttir.mean"(%2083, %2084) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3133)
+    %2086 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3134)
+    %2087 = "ttir.add"(%2085, %arg144, %2086) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3134)
+    %2088 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3135)
+    %2089 = "ttir.sqrt"(%2087, %2088) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3135)
+    %2090 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3136)
+    %2091 = "ttir.reciprocal"(%2089, %2090) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3136)
+    %2092 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3137)
+    %2093 = "ttir.multiply"(%2081, %2091, %2092) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3137)
+    %2094 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3138)
+    %2095 = "ttir.multiply"(%arg407, %2093, %2094) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3138)
+    %2096 = tensor.empty() : tensor<12x3200xf32> loc(#loc3139)
+    %2097 = "ttir.squeeze"(%2095, %2096) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3139)
+    %2098 = tensor.empty() : tensor<12x3200xf32> loc(#loc3140)
+    %2099 = "ttir.matmul"(%2097, %arg408, %2098) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3140)
+    %2100 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3141)
+    %2101 = "ttir.reshape"(%2099, %2100) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3141)
+    %2102 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3142)
+    %2103 = "ttir.transpose"(%2101, %2102) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3142)
+    %2104 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3143)
+    %2105 = "ttir.concat"(%arg145, %arg145, %2104) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3143)
+    %2106 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3144)
+    %2107 = "ttir.sin"(%2105, %2106) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3144)
+    %2108 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc3145)
+    %2109 = "ttir.unsqueeze"(%2107, %2108) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc3145)
+    %2110 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3146)
+    %2111 = "ttir.multiply"(%2103, %2109, %2110) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3146)
+    %2112 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3147)
+    %2113 = "ttir.transpose"(%2103, %2112) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3147)
+    %2114 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3148)
+    %2115 = "ttir.matmul"(%arg146, %2113, %2114) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3148)
+    %2116 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3149)
+    %2117 = "ttir.transpose"(%2115, %2116) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3149)
+    %2118 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3150)
+    %2119 = "ttir.multiply"(%2117, %arg147, %2118) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3150)
+    %2120 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3151)
+    %2121 = "ttir.transpose"(%2103, %2120) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3151)
+    %2122 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3152)
+    %2123 = "ttir.matmul"(%arg148, %2121, %2122) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3152)
+    %2124 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3153)
+    %2125 = "ttir.transpose"(%2123, %2124) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3153)
+    %2126 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3154)
+    %2127 = "ttir.concat"(%2119, %2125, %2126) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3154)
+    %2128 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3155)
+    %2129 = "ttir.cos"(%2105, %2128) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3155)
+    %2130 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc3156)
+    %2131 = "ttir.unsqueeze"(%2129, %2130) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc3156)
+    %2132 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3157)
+    %2133 = "ttir.multiply"(%2127, %2131, %2132) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3157)
+    %2134 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3158)
+    %2135 = "ttir.add"(%2111, %2133, %2134) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3158)
+    %2136 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3159)
+    %2137 = "ttir.squeeze"(%2135, %2136) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3159)
+    %2138 = tensor.empty() : tensor<12x3200xf32> loc(#loc3160)
+    %2139 = "ttir.matmul"(%2097, %arg409, %2138) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3160)
+    %2140 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3161)
+    %2141 = "ttir.reshape"(%2139, %2140) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3161)
+    %2142 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3162)
+    %2143 = "ttir.transpose"(%2141, %2142) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3162)
+    %2144 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3163)
+    %2145 = "ttir.multiply"(%2143, %2109, %2144) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3163)
+    %2146 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3164)
+    %2147 = "ttir.transpose"(%2143, %2146) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3164)
+    %2148 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3165)
+    %2149 = "ttir.matmul"(%arg149, %2147, %2148) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3165)
+    %2150 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3166)
+    %2151 = "ttir.transpose"(%2149, %2150) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3166)
+    %2152 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3167)
+    %2153 = "ttir.multiply"(%2151, %arg150, %2152) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3167)
+    %2154 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3168)
+    %2155 = "ttir.transpose"(%2143, %2154) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3168)
+    %2156 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3169)
+    %2157 = "ttir.matmul"(%arg151, %2155, %2156) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3169)
+    %2158 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3170)
+    %2159 = "ttir.transpose"(%2157, %2158) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3170)
+    %2160 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3171)
+    %2161 = "ttir.concat"(%2153, %2159, %2160) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3171)
+    %2162 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3172)
+    %2163 = "ttir.multiply"(%2161, %2131, %2162) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3172)
+    %2164 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3173)
+    %2165 = "ttir.add"(%2145, %2163, %2164) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3173)
+    %2166 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3174)
+    %2167 = "ttir.squeeze"(%2165, %2166) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3174)
+    %2168 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3175)
+    %2169 = "ttir.transpose"(%2167, %2168) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3175)
+    %2170 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3176)
+    %2171 = "ttir.matmul"(%2137, %2169, %2170) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3176)
+    %2172 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3177)
+    %2173 = "ttir.unsqueeze"(%2171, %2172) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3177)
+    %2174 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3178)
+    %2175 = "ttir.multiply"(%2173, %arg152, %2174) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3178)
+    %2176 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3179)
+    %2177 = "ttir.add"(%2175, %arg153, %2176) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3179)
+    %2178 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3180)
+    %2179 = "ttir.softmax"(%2177, %2178) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3180)
+    %2180 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3181)
+    %2181 = "ttir.squeeze"(%2179, %2180) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3181)
+    %2182 = tensor.empty() : tensor<12x3200xf32> loc(#loc3182)
+    %2183 = "ttir.matmul"(%2097, %arg410, %2182) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3182)
+    %2184 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3183)
+    %2185 = "ttir.reshape"(%2183, %2184) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3183)
+    %2186 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3184)
+    %2187 = "ttir.transpose"(%2185, %2186) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3184)
+    %2188 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3185)
+    %2189 = "ttir.transpose"(%2187, %2188) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3185)
+    %2190 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3186)
+    %2191 = "ttir.squeeze"(%2189, %2190) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3186)
+    %2192 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3187)
+    %2193 = "ttir.transpose"(%2191, %2192) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3187)
+    %2194 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3188)
+    %2195 = "ttir.matmul"(%2181, %2193, %2194) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3188)
+    %2196 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3189)
+    %2197 = "ttir.unsqueeze"(%2195, %2196) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3189)
+    %2198 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3190)
+    %2199 = "ttir.transpose"(%2197, %2198) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3190)
+    %2200 = tensor.empty() : tensor<12x3200xf32> loc(#loc3191)
+    %2201 = "ttir.reshape"(%2199, %2200) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3191)
+    %2202 = tensor.empty() : tensor<12x3200xf32> loc(#loc3192)
+    %2203 = "ttir.matmul"(%2201, %arg411, %2202) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3192)
+    %2204 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3193)
+    %2205 = "ttir.unsqueeze"(%2203, %2204) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3193)
+    %2206 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3194)
+    %2207 = "ttir.add"(%2081, %2205, %2206) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3194)
+    %2208 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3195)
+    %2209 = "ttir.multiply"(%2207, %2207, %2208) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3195)
+    %2210 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3196)
+    %2211 = "ttir.mean"(%2209, %2210) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3196)
+    %2212 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3197)
+    %2213 = "ttir.add"(%2211, %arg154, %2212) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3197)
+    %2214 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3198)
+    %2215 = "ttir.sqrt"(%2213, %2214) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3198)
+    %2216 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3199)
+    %2217 = "ttir.reciprocal"(%2215, %2216) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3199)
+    %2218 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3200)
+    %2219 = "ttir.multiply"(%2207, %2217, %2218) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3200)
+    %2220 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3201)
+    %2221 = "ttir.multiply"(%arg412, %2219, %2220) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3201)
+    %2222 = tensor.empty() : tensor<12x3200xf32> loc(#loc3202)
+    %2223 = "ttir.squeeze"(%2221, %2222) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3202)
+    %2224 = tensor.empty() : tensor<12x8640xf32> loc(#loc3203)
+    %2225 = "ttir.matmul"(%2223, %arg413, %2224) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc3203)
+    %2226 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3204)
+    %2227 = "ttir.unsqueeze"(%2225, %2226) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3204)
+    %2228 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3205)
+    %2229 = "ttir.sigmoid"(%2227, %2228) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3205)
+    %2230 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3206)
+    %2231 = "ttir.multiply"(%2227, %2229, %2230) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3206)
+    %2232 = tensor.empty() : tensor<12x8640xf32> loc(#loc3207)
+    %2233 = "ttir.matmul"(%2223, %arg414, %2232) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc3207)
+    %2234 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3208)
+    %2235 = "ttir.unsqueeze"(%2233, %2234) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3208)
+    %2236 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3209)
+    %2237 = "ttir.multiply"(%2231, %2235, %2236) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3209)
+    %2238 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3210)
+    %2239 = "ttir.matmul"(%2237, %arg415, %2238) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3210)
+    %2240 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3211)
+    %2241 = "ttir.add"(%2207, %2239, %2240) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3211)
+    %2242 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3212)
+    %2243 = "ttir.multiply"(%2241, %2241, %2242) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3212)
+    %2244 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3213)
+    %2245 = "ttir.mean"(%2243, %2244) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3213)
+    %2246 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3214)
+    %2247 = "ttir.add"(%2245, %arg155, %2246) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3214)
+    %2248 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3215)
+    %2249 = "ttir.sqrt"(%2247, %2248) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3215)
+    %2250 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3216)
+    %2251 = "ttir.reciprocal"(%2249, %2250) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3216)
+    %2252 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3217)
+    %2253 = "ttir.multiply"(%2241, %2251, %2252) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3217)
+    %2254 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3218)
+    %2255 = "ttir.multiply"(%arg416, %2253, %2254) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3218)
+    %2256 = tensor.empty() : tensor<12x3200xf32> loc(#loc3219)
+    %2257 = "ttir.squeeze"(%2255, %2256) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3219)
+    %2258 = tensor.empty() : tensor<12x3200xf32> loc(#loc3220)
+    %2259 = "ttir.matmul"(%2257, %arg417, %2258) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3220)
+    %2260 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3221)
+    %2261 = "ttir.reshape"(%2259, %2260) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3221)
+    %2262 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3222)
+    %2263 = "ttir.transpose"(%2261, %2262) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3222)
+    %2264 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3223)
+    %2265 = "ttir.concat"(%arg156, %arg156, %2264) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3223)
+    %2266 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3224)
+    %2267 = "ttir.sin"(%2265, %2266) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3224)
+    %2268 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc3225)
+    %2269 = "ttir.unsqueeze"(%2267, %2268) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc3225)
+    %2270 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3226)
+    %2271 = "ttir.multiply"(%2263, %2269, %2270) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3226)
+    %2272 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3227)
+    %2273 = "ttir.transpose"(%2263, %2272) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3227)
+    %2274 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3228)
+    %2275 = "ttir.matmul"(%arg157, %2273, %2274) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3228)
+    %2276 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3229)
+    %2277 = "ttir.transpose"(%2275, %2276) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3229)
+    %2278 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3230)
+    %2279 = "ttir.multiply"(%2277, %arg158, %2278) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3230)
+    %2280 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3231)
+    %2281 = "ttir.transpose"(%2263, %2280) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3231)
+    %2282 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3232)
+    %2283 = "ttir.matmul"(%arg159, %2281, %2282) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3232)
+    %2284 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3233)
+    %2285 = "ttir.transpose"(%2283, %2284) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3233)
+    %2286 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3234)
+    %2287 = "ttir.concat"(%2279, %2285, %2286) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3234)
+    %2288 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3235)
+    %2289 = "ttir.cos"(%2265, %2288) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3235)
+    %2290 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc3236)
+    %2291 = "ttir.unsqueeze"(%2289, %2290) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc3236)
+    %2292 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3237)
+    %2293 = "ttir.multiply"(%2287, %2291, %2292) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3237)
+    %2294 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3238)
+    %2295 = "ttir.add"(%2271, %2293, %2294) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3238)
+    %2296 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3239)
+    %2297 = "ttir.squeeze"(%2295, %2296) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3239)
+    %2298 = tensor.empty() : tensor<12x3200xf32> loc(#loc3240)
+    %2299 = "ttir.matmul"(%2257, %arg418, %2298) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3240)
+    %2300 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3241)
+    %2301 = "ttir.reshape"(%2299, %2300) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3241)
+    %2302 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3242)
+    %2303 = "ttir.transpose"(%2301, %2302) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3242)
+    %2304 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3243)
+    %2305 = "ttir.multiply"(%2303, %2269, %2304) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3243)
+    %2306 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3244)
+    %2307 = "ttir.transpose"(%2303, %2306) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3244)
+    %2308 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3245)
+    %2309 = "ttir.matmul"(%arg160, %2307, %2308) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3245)
+    %2310 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3246)
+    %2311 = "ttir.transpose"(%2309, %2310) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3246)
+    %2312 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3247)
+    %2313 = "ttir.multiply"(%2311, %arg161, %2312) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3247)
+    %2314 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3248)
+    %2315 = "ttir.transpose"(%2303, %2314) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3248)
+    %2316 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3249)
+    %2317 = "ttir.matmul"(%arg162, %2315, %2316) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3249)
+    %2318 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3250)
+    %2319 = "ttir.transpose"(%2317, %2318) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3250)
+    %2320 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3251)
+    %2321 = "ttir.concat"(%2313, %2319, %2320) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3251)
+    %2322 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3252)
+    %2323 = "ttir.multiply"(%2321, %2291, %2322) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3252)
+    %2324 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3253)
+    %2325 = "ttir.add"(%2305, %2323, %2324) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3253)
+    %2326 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3254)
+    %2327 = "ttir.squeeze"(%2325, %2326) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3254)
+    %2328 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3255)
+    %2329 = "ttir.transpose"(%2327, %2328) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3255)
+    %2330 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3256)
+    %2331 = "ttir.matmul"(%2297, %2329, %2330) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3256)
+    %2332 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3257)
+    %2333 = "ttir.unsqueeze"(%2331, %2332) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3257)
+    %2334 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3258)
+    %2335 = "ttir.multiply"(%2333, %arg163, %2334) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3258)
+    %2336 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3259)
+    %2337 = "ttir.add"(%2335, %arg164, %2336) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3259)
+    %2338 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3260)
+    %2339 = "ttir.softmax"(%2337, %2338) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3260)
+    %2340 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3261)
+    %2341 = "ttir.squeeze"(%2339, %2340) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3261)
+    %2342 = tensor.empty() : tensor<12x3200xf32> loc(#loc3262)
+    %2343 = "ttir.matmul"(%2257, %arg419, %2342) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3262)
+    %2344 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3263)
+    %2345 = "ttir.reshape"(%2343, %2344) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3263)
+    %2346 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3264)
+    %2347 = "ttir.transpose"(%2345, %2346) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3264)
+    %2348 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3265)
+    %2349 = "ttir.transpose"(%2347, %2348) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3265)
+    %2350 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3266)
+    %2351 = "ttir.squeeze"(%2349, %2350) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3266)
+    %2352 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3267)
+    %2353 = "ttir.transpose"(%2351, %2352) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3267)
+    %2354 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3268)
+    %2355 = "ttir.matmul"(%2341, %2353, %2354) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3268)
+    %2356 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3269)
+    %2357 = "ttir.unsqueeze"(%2355, %2356) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3269)
+    %2358 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3270)
+    %2359 = "ttir.transpose"(%2357, %2358) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3270)
+    %2360 = tensor.empty() : tensor<12x3200xf32> loc(#loc3271)
+    %2361 = "ttir.reshape"(%2359, %2360) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3271)
+    %2362 = tensor.empty() : tensor<12x3200xf32> loc(#loc3272)
+    %2363 = "ttir.matmul"(%2361, %arg420, %2362) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3272)
+    %2364 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3273)
+    %2365 = "ttir.unsqueeze"(%2363, %2364) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3273)
+    %2366 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3274)
+    %2367 = "ttir.add"(%2241, %2365, %2366) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3274)
+    %2368 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3275)
+    %2369 = "ttir.multiply"(%2367, %2367, %2368) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3275)
+    %2370 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3276)
+    %2371 = "ttir.mean"(%2369, %2370) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3276)
+    %2372 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3277)
+    %2373 = "ttir.add"(%2371, %arg165, %2372) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3277)
+    %2374 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3278)
+    %2375 = "ttir.sqrt"(%2373, %2374) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3278)
+    %2376 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3279)
+    %2377 = "ttir.reciprocal"(%2375, %2376) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3279)
+    %2378 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3280)
+    %2379 = "ttir.multiply"(%2367, %2377, %2378) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3280)
+    %2380 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3281)
+    %2381 = "ttir.multiply"(%arg421, %2379, %2380) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3281)
+    %2382 = tensor.empty() : tensor<12x3200xf32> loc(#loc3282)
+    %2383 = "ttir.squeeze"(%2381, %2382) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3282)
+    %2384 = tensor.empty() : tensor<12x8640xf32> loc(#loc3283)
+    %2385 = "ttir.matmul"(%2383, %arg422, %2384) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc3283)
+    %2386 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3284)
+    %2387 = "ttir.unsqueeze"(%2385, %2386) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3284)
+    %2388 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3285)
+    %2389 = "ttir.sigmoid"(%2387, %2388) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3285)
+    %2390 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3286)
+    %2391 = "ttir.multiply"(%2387, %2389, %2390) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3286)
+    %2392 = tensor.empty() : tensor<12x8640xf32> loc(#loc3287)
+    %2393 = "ttir.matmul"(%2383, %arg423, %2392) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc3287)
+    %2394 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3288)
+    %2395 = "ttir.unsqueeze"(%2393, %2394) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3288)
+    %2396 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3289)
+    %2397 = "ttir.multiply"(%2391, %2395, %2396) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3289)
+    %2398 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3290)
+    %2399 = "ttir.matmul"(%2397, %arg424, %2398) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3290)
+    %2400 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3291)
+    %2401 = "ttir.add"(%2367, %2399, %2400) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3291)
+    %2402 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3292)
+    %2403 = "ttir.multiply"(%2401, %2401, %2402) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3292)
+    %2404 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3293)
+    %2405 = "ttir.mean"(%2403, %2404) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3293)
+    %2406 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3294)
+    %2407 = "ttir.add"(%2405, %arg166, %2406) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3294)
+    %2408 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3295)
+    %2409 = "ttir.sqrt"(%2407, %2408) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3295)
+    %2410 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3296)
+    %2411 = "ttir.reciprocal"(%2409, %2410) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3296)
+    %2412 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3297)
+    %2413 = "ttir.multiply"(%2401, %2411, %2412) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3297)
+    %2414 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3298)
+    %2415 = "ttir.multiply"(%arg425, %2413, %2414) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3298)
+    %2416 = tensor.empty() : tensor<12x3200xf32> loc(#loc3299)
+    %2417 = "ttir.squeeze"(%2415, %2416) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3299)
+    %2418 = tensor.empty() : tensor<12x3200xf32> loc(#loc3300)
+    %2419 = "ttir.matmul"(%2417, %arg426, %2418) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3300)
+    %2420 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3301)
+    %2421 = "ttir.reshape"(%2419, %2420) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3301)
+    %2422 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3302)
+    %2423 = "ttir.transpose"(%2421, %2422) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3302)
+    %2424 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3303)
+    %2425 = "ttir.concat"(%arg167, %arg167, %2424) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3303)
+    %2426 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3304)
+    %2427 = "ttir.sin"(%2425, %2426) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3304)
+    %2428 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc3305)
+    %2429 = "ttir.unsqueeze"(%2427, %2428) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc3305)
+    %2430 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3306)
+    %2431 = "ttir.multiply"(%2423, %2429, %2430) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3306)
+    %2432 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3307)
+    %2433 = "ttir.transpose"(%2423, %2432) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3307)
+    %2434 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3308)
+    %2435 = "ttir.matmul"(%arg168, %2433, %2434) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3308)
+    %2436 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3309)
+    %2437 = "ttir.transpose"(%2435, %2436) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3309)
+    %2438 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3310)
+    %2439 = "ttir.multiply"(%2437, %arg169, %2438) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3310)
+    %2440 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3311)
+    %2441 = "ttir.transpose"(%2423, %2440) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3311)
+    %2442 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3312)
+    %2443 = "ttir.matmul"(%arg170, %2441, %2442) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3312)
+    %2444 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3313)
+    %2445 = "ttir.transpose"(%2443, %2444) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3313)
+    %2446 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3314)
+    %2447 = "ttir.concat"(%2439, %2445, %2446) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3314)
+    %2448 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3315)
+    %2449 = "ttir.cos"(%2425, %2448) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3315)
+    %2450 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc3316)
+    %2451 = "ttir.unsqueeze"(%2449, %2450) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc3316)
+    %2452 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3317)
+    %2453 = "ttir.multiply"(%2447, %2451, %2452) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3317)
+    %2454 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3318)
+    %2455 = "ttir.add"(%2431, %2453, %2454) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3318)
+    %2456 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3319)
+    %2457 = "ttir.squeeze"(%2455, %2456) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3319)
+    %2458 = tensor.empty() : tensor<12x3200xf32> loc(#loc3320)
+    %2459 = "ttir.matmul"(%2417, %arg427, %2458) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3320)
+    %2460 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3321)
+    %2461 = "ttir.reshape"(%2459, %2460) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3321)
+    %2462 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3322)
+    %2463 = "ttir.transpose"(%2461, %2462) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3322)
+    %2464 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3323)
+    %2465 = "ttir.multiply"(%2463, %2429, %2464) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3323)
+    %2466 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3324)
+    %2467 = "ttir.transpose"(%2463, %2466) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3324)
+    %2468 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3325)
+    %2469 = "ttir.matmul"(%arg171, %2467, %2468) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3325)
+    %2470 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3326)
+    %2471 = "ttir.transpose"(%2469, %2470) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3326)
+    %2472 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3327)
+    %2473 = "ttir.multiply"(%2471, %arg172, %2472) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3327)
+    %2474 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3328)
+    %2475 = "ttir.transpose"(%2463, %2474) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3328)
+    %2476 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3329)
+    %2477 = "ttir.matmul"(%arg173, %2475, %2476) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3329)
+    %2478 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3330)
+    %2479 = "ttir.transpose"(%2477, %2478) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3330)
+    %2480 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3331)
+    %2481 = "ttir.concat"(%2473, %2479, %2480) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3331)
+    %2482 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3332)
+    %2483 = "ttir.multiply"(%2481, %2451, %2482) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3332)
+    %2484 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3333)
+    %2485 = "ttir.add"(%2465, %2483, %2484) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3333)
+    %2486 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3334)
+    %2487 = "ttir.squeeze"(%2485, %2486) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3334)
+    %2488 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3335)
+    %2489 = "ttir.transpose"(%2487, %2488) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3335)
+    %2490 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3336)
+    %2491 = "ttir.matmul"(%2457, %2489, %2490) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3336)
+    %2492 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3337)
+    %2493 = "ttir.unsqueeze"(%2491, %2492) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3337)
+    %2494 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3338)
+    %2495 = "ttir.multiply"(%2493, %arg174, %2494) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3338)
+    %2496 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3339)
+    %2497 = "ttir.add"(%2495, %arg175, %2496) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3339)
+    %2498 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3340)
+    %2499 = "ttir.softmax"(%2497, %2498) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3340)
+    %2500 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3341)
+    %2501 = "ttir.squeeze"(%2499, %2500) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3341)
+    %2502 = tensor.empty() : tensor<12x3200xf32> loc(#loc3342)
+    %2503 = "ttir.matmul"(%2417, %arg428, %2502) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3342)
+    %2504 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3343)
+    %2505 = "ttir.reshape"(%2503, %2504) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3343)
+    %2506 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3344)
+    %2507 = "ttir.transpose"(%2505, %2506) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3344)
+    %2508 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3345)
+    %2509 = "ttir.transpose"(%2507, %2508) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3345)
+    %2510 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3346)
+    %2511 = "ttir.squeeze"(%2509, %2510) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3346)
+    %2512 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3347)
+    %2513 = "ttir.transpose"(%2511, %2512) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3347)
+    %2514 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3348)
+    %2515 = "ttir.matmul"(%2501, %2513, %2514) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3348)
+    %2516 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3349)
+    %2517 = "ttir.unsqueeze"(%2515, %2516) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3349)
+    %2518 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3350)
+    %2519 = "ttir.transpose"(%2517, %2518) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3350)
+    %2520 = tensor.empty() : tensor<12x3200xf32> loc(#loc3351)
+    %2521 = "ttir.reshape"(%2519, %2520) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3351)
+    %2522 = tensor.empty() : tensor<12x3200xf32> loc(#loc3352)
+    %2523 = "ttir.matmul"(%2521, %arg429, %2522) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3352)
+    %2524 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3353)
+    %2525 = "ttir.unsqueeze"(%2523, %2524) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3353)
+    %2526 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3354)
+    %2527 = "ttir.add"(%2401, %2525, %2526) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3354)
+    %2528 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3355)
+    %2529 = "ttir.multiply"(%2527, %2527, %2528) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3355)
+    %2530 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3356)
+    %2531 = "ttir.mean"(%2529, %2530) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3356)
+    %2532 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3357)
+    %2533 = "ttir.add"(%2531, %arg176, %2532) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3357)
+    %2534 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3358)
+    %2535 = "ttir.sqrt"(%2533, %2534) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3358)
+    %2536 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3359)
+    %2537 = "ttir.reciprocal"(%2535, %2536) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3359)
+    %2538 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3360)
+    %2539 = "ttir.multiply"(%2527, %2537, %2538) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3360)
+    %2540 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3361)
+    %2541 = "ttir.multiply"(%arg430, %2539, %2540) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3361)
+    %2542 = tensor.empty() : tensor<12x3200xf32> loc(#loc3362)
+    %2543 = "ttir.squeeze"(%2541, %2542) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3362)
+    %2544 = tensor.empty() : tensor<12x8640xf32> loc(#loc3363)
+    %2545 = "ttir.matmul"(%2543, %arg431, %2544) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc3363)
+    %2546 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3364)
+    %2547 = "ttir.unsqueeze"(%2545, %2546) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3364)
+    %2548 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3365)
+    %2549 = "ttir.sigmoid"(%2547, %2548) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3365)
+    %2550 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3366)
+    %2551 = "ttir.multiply"(%2547, %2549, %2550) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3366)
+    %2552 = tensor.empty() : tensor<12x8640xf32> loc(#loc3367)
+    %2553 = "ttir.matmul"(%2543, %arg432, %2552) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc3367)
+    %2554 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3368)
+    %2555 = "ttir.unsqueeze"(%2553, %2554) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3368)
+    %2556 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3369)
+    %2557 = "ttir.multiply"(%2551, %2555, %2556) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3369)
+    %2558 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3370)
+    %2559 = "ttir.matmul"(%2557, %arg433, %2558) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3370)
+    %2560 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3371)
+    %2561 = "ttir.add"(%2527, %2559, %2560) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3371)
+    %2562 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3372)
+    %2563 = "ttir.multiply"(%2561, %2561, %2562) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3372)
+    %2564 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3373)
+    %2565 = "ttir.mean"(%2563, %2564) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3373)
+    %2566 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3374)
+    %2567 = "ttir.add"(%2565, %arg177, %2566) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3374)
+    %2568 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3375)
+    %2569 = "ttir.sqrt"(%2567, %2568) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3375)
+    %2570 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3376)
+    %2571 = "ttir.reciprocal"(%2569, %2570) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3376)
+    %2572 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3377)
+    %2573 = "ttir.multiply"(%2561, %2571, %2572) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3377)
+    %2574 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3378)
+    %2575 = "ttir.multiply"(%arg434, %2573, %2574) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3378)
+    %2576 = tensor.empty() : tensor<12x3200xf32> loc(#loc3379)
+    %2577 = "ttir.squeeze"(%2575, %2576) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3379)
+    %2578 = tensor.empty() : tensor<12x3200xf32> loc(#loc3380)
+    %2579 = "ttir.matmul"(%2577, %arg435, %2578) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3380)
+    %2580 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3381)
+    %2581 = "ttir.reshape"(%2579, %2580) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3381)
+    %2582 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3382)
+    %2583 = "ttir.transpose"(%2581, %2582) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3382)
+    %2584 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3383)
+    %2585 = "ttir.concat"(%arg178, %arg178, %2584) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3383)
+    %2586 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3384)
+    %2587 = "ttir.sin"(%2585, %2586) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3384)
+    %2588 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc3385)
+    %2589 = "ttir.unsqueeze"(%2587, %2588) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc3385)
+    %2590 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3386)
+    %2591 = "ttir.multiply"(%2583, %2589, %2590) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3386)
+    %2592 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3387)
+    %2593 = "ttir.transpose"(%2583, %2592) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3387)
+    %2594 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3388)
+    %2595 = "ttir.matmul"(%arg179, %2593, %2594) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3388)
+    %2596 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3389)
+    %2597 = "ttir.transpose"(%2595, %2596) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3389)
+    %2598 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3390)
+    %2599 = "ttir.multiply"(%2597, %arg180, %2598) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3390)
+    %2600 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3391)
+    %2601 = "ttir.transpose"(%2583, %2600) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3391)
+    %2602 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3392)
+    %2603 = "ttir.matmul"(%arg181, %2601, %2602) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3392)
+    %2604 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3393)
+    %2605 = "ttir.transpose"(%2603, %2604) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3393)
+    %2606 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3394)
+    %2607 = "ttir.concat"(%2599, %2605, %2606) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3394)
+    %2608 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3395)
+    %2609 = "ttir.cos"(%2585, %2608) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3395)
+    %2610 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc3396)
+    %2611 = "ttir.unsqueeze"(%2609, %2610) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc3396)
+    %2612 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3397)
+    %2613 = "ttir.multiply"(%2607, %2611, %2612) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3397)
+    %2614 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3398)
+    %2615 = "ttir.add"(%2591, %2613, %2614) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3398)
+    %2616 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3399)
+    %2617 = "ttir.squeeze"(%2615, %2616) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3399)
+    %2618 = tensor.empty() : tensor<12x3200xf32> loc(#loc3400)
+    %2619 = "ttir.matmul"(%2577, %arg436, %2618) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3400)
+    %2620 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3401)
+    %2621 = "ttir.reshape"(%2619, %2620) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3401)
+    %2622 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3402)
+    %2623 = "ttir.transpose"(%2621, %2622) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3402)
+    %2624 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3403)
+    %2625 = "ttir.multiply"(%2623, %2589, %2624) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3403)
+    %2626 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3404)
+    %2627 = "ttir.transpose"(%2623, %2626) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3404)
+    %2628 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3405)
+    %2629 = "ttir.matmul"(%arg182, %2627, %2628) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3405)
+    %2630 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3406)
+    %2631 = "ttir.transpose"(%2629, %2630) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3406)
+    %2632 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3407)
+    %2633 = "ttir.multiply"(%2631, %arg183, %2632) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3407)
+    %2634 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3408)
+    %2635 = "ttir.transpose"(%2623, %2634) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3408)
+    %2636 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3409)
+    %2637 = "ttir.matmul"(%arg184, %2635, %2636) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3409)
+    %2638 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3410)
+    %2639 = "ttir.transpose"(%2637, %2638) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3410)
+    %2640 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3411)
+    %2641 = "ttir.concat"(%2633, %2639, %2640) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3411)
+    %2642 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3412)
+    %2643 = "ttir.multiply"(%2641, %2611, %2642) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3412)
+    %2644 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3413)
+    %2645 = "ttir.add"(%2625, %2643, %2644) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3413)
+    %2646 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3414)
+    %2647 = "ttir.squeeze"(%2645, %2646) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3414)
+    %2648 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3415)
+    %2649 = "ttir.transpose"(%2647, %2648) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3415)
+    %2650 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3416)
+    %2651 = "ttir.matmul"(%2617, %2649, %2650) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3416)
+    %2652 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3417)
+    %2653 = "ttir.unsqueeze"(%2651, %2652) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3417)
+    %2654 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3418)
+    %2655 = "ttir.multiply"(%2653, %arg185, %2654) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3418)
+    %2656 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3419)
+    %2657 = "ttir.add"(%2655, %arg186, %2656) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3419)
+    %2658 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3420)
+    %2659 = "ttir.softmax"(%2657, %2658) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3420)
+    %2660 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3421)
+    %2661 = "ttir.squeeze"(%2659, %2660) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3421)
+    %2662 = tensor.empty() : tensor<12x3200xf32> loc(#loc3422)
+    %2663 = "ttir.matmul"(%2577, %arg437, %2662) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3422)
+    %2664 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3423)
+    %2665 = "ttir.reshape"(%2663, %2664) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3423)
+    %2666 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3424)
+    %2667 = "ttir.transpose"(%2665, %2666) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3424)
+    %2668 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3425)
+    %2669 = "ttir.transpose"(%2667, %2668) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3425)
+    %2670 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3426)
+    %2671 = "ttir.squeeze"(%2669, %2670) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3426)
+    %2672 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3427)
+    %2673 = "ttir.transpose"(%2671, %2672) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3427)
+    %2674 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3428)
+    %2675 = "ttir.matmul"(%2661, %2673, %2674) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3428)
+    %2676 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3429)
+    %2677 = "ttir.unsqueeze"(%2675, %2676) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3429)
+    %2678 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3430)
+    %2679 = "ttir.transpose"(%2677, %2678) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3430)
+    %2680 = tensor.empty() : tensor<12x3200xf32> loc(#loc3431)
+    %2681 = "ttir.reshape"(%2679, %2680) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3431)
+    %2682 = tensor.empty() : tensor<12x3200xf32> loc(#loc3432)
+    %2683 = "ttir.matmul"(%2681, %arg438, %2682) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3432)
+    %2684 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3433)
+    %2685 = "ttir.unsqueeze"(%2683, %2684) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3433)
+    %2686 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3434)
+    %2687 = "ttir.add"(%2561, %2685, %2686) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3434)
+    %2688 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3435)
+    %2689 = "ttir.multiply"(%2687, %2687, %2688) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3435)
+    %2690 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3436)
+    %2691 = "ttir.mean"(%2689, %2690) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3436)
+    %2692 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3437)
+    %2693 = "ttir.add"(%2691, %arg187, %2692) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3437)
+    %2694 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3438)
+    %2695 = "ttir.sqrt"(%2693, %2694) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3438)
+    %2696 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3439)
+    %2697 = "ttir.reciprocal"(%2695, %2696) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3439)
+    %2698 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3440)
+    %2699 = "ttir.multiply"(%2687, %2697, %2698) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3440)
+    %2700 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3441)
+    %2701 = "ttir.multiply"(%arg439, %2699, %2700) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3441)
+    %2702 = tensor.empty() : tensor<12x3200xf32> loc(#loc3442)
+    %2703 = "ttir.squeeze"(%2701, %2702) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3442)
+    %2704 = tensor.empty() : tensor<12x8640xf32> loc(#loc3443)
+    %2705 = "ttir.matmul"(%2703, %arg440, %2704) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc3443)
+    %2706 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3444)
+    %2707 = "ttir.unsqueeze"(%2705, %2706) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3444)
+    %2708 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3445)
+    %2709 = "ttir.sigmoid"(%2707, %2708) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3445)
+    %2710 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3446)
+    %2711 = "ttir.multiply"(%2707, %2709, %2710) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3446)
+    %2712 = tensor.empty() : tensor<12x8640xf32> loc(#loc3447)
+    %2713 = "ttir.matmul"(%2703, %arg441, %2712) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc3447)
+    %2714 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3448)
+    %2715 = "ttir.unsqueeze"(%2713, %2714) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3448)
+    %2716 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3449)
+    %2717 = "ttir.multiply"(%2711, %2715, %2716) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3449)
+    %2718 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3450)
+    %2719 = "ttir.matmul"(%2717, %arg442, %2718) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3450)
+    %2720 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3451)
+    %2721 = "ttir.add"(%2687, %2719, %2720) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3451)
+    %2722 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3452)
+    %2723 = "ttir.multiply"(%2721, %2721, %2722) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3452)
+    %2724 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3453)
+    %2725 = "ttir.mean"(%2723, %2724) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3453)
+    %2726 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3454)
+    %2727 = "ttir.add"(%2725, %arg188, %2726) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3454)
+    %2728 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3455)
+    %2729 = "ttir.sqrt"(%2727, %2728) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3455)
+    %2730 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3456)
+    %2731 = "ttir.reciprocal"(%2729, %2730) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3456)
+    %2732 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3457)
+    %2733 = "ttir.multiply"(%2721, %2731, %2732) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3457)
+    %2734 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3458)
+    %2735 = "ttir.multiply"(%arg443, %2733, %2734) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3458)
+    %2736 = tensor.empty() : tensor<12x3200xf32> loc(#loc3459)
+    %2737 = "ttir.squeeze"(%2735, %2736) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3459)
+    %2738 = tensor.empty() : tensor<12x3200xf32> loc(#loc3460)
+    %2739 = "ttir.matmul"(%2737, %arg444, %2738) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3460)
+    %2740 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3461)
+    %2741 = "ttir.reshape"(%2739, %2740) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3461)
+    %2742 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3462)
+    %2743 = "ttir.transpose"(%2741, %2742) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3462)
+    %2744 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3463)
+    %2745 = "ttir.concat"(%arg189, %arg189, %2744) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3463)
+    %2746 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3464)
+    %2747 = "ttir.sin"(%2745, %2746) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3464)
+    %2748 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc3465)
+    %2749 = "ttir.unsqueeze"(%2747, %2748) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc3465)
+    %2750 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3466)
+    %2751 = "ttir.multiply"(%2743, %2749, %2750) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3466)
+    %2752 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3467)
+    %2753 = "ttir.transpose"(%2743, %2752) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3467)
+    %2754 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3468)
+    %2755 = "ttir.matmul"(%arg190, %2753, %2754) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3468)
+    %2756 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3469)
+    %2757 = "ttir.transpose"(%2755, %2756) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3469)
+    %2758 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3470)
+    %2759 = "ttir.multiply"(%2757, %arg191, %2758) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3470)
+    %2760 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3471)
+    %2761 = "ttir.transpose"(%2743, %2760) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3471)
+    %2762 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3472)
+    %2763 = "ttir.matmul"(%arg192, %2761, %2762) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3472)
+    %2764 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3473)
+    %2765 = "ttir.transpose"(%2763, %2764) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3473)
+    %2766 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3474)
+    %2767 = "ttir.concat"(%2759, %2765, %2766) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3474)
+    %2768 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3475)
+    %2769 = "ttir.cos"(%2745, %2768) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3475)
+    %2770 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc3476)
+    %2771 = "ttir.unsqueeze"(%2769, %2770) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc3476)
+    %2772 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3477)
+    %2773 = "ttir.multiply"(%2767, %2771, %2772) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3477)
+    %2774 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3478)
+    %2775 = "ttir.add"(%2751, %2773, %2774) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3478)
+    %2776 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3479)
+    %2777 = "ttir.squeeze"(%2775, %2776) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3479)
+    %2778 = tensor.empty() : tensor<12x3200xf32> loc(#loc3480)
+    %2779 = "ttir.matmul"(%2737, %arg445, %2778) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3480)
+    %2780 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3481)
+    %2781 = "ttir.reshape"(%2779, %2780) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3481)
+    %2782 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3482)
+    %2783 = "ttir.transpose"(%2781, %2782) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3482)
+    %2784 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3483)
+    %2785 = "ttir.multiply"(%2783, %2749, %2784) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3483)
+    %2786 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3484)
+    %2787 = "ttir.transpose"(%2783, %2786) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3484)
+    %2788 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3485)
+    %2789 = "ttir.matmul"(%arg193, %2787, %2788) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3485)
+    %2790 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3486)
+    %2791 = "ttir.transpose"(%2789, %2790) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3486)
+    %2792 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3487)
+    %2793 = "ttir.multiply"(%2791, %arg194, %2792) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3487)
+    %2794 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3488)
+    %2795 = "ttir.transpose"(%2783, %2794) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3488)
+    %2796 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3489)
+    %2797 = "ttir.matmul"(%arg195, %2795, %2796) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3489)
+    %2798 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3490)
+    %2799 = "ttir.transpose"(%2797, %2798) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3490)
+    %2800 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3491)
+    %2801 = "ttir.concat"(%2793, %2799, %2800) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3491)
+    %2802 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3492)
+    %2803 = "ttir.multiply"(%2801, %2771, %2802) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3492)
+    %2804 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3493)
+    %2805 = "ttir.add"(%2785, %2803, %2804) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3493)
+    %2806 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3494)
+    %2807 = "ttir.squeeze"(%2805, %2806) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3494)
+    %2808 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3495)
+    %2809 = "ttir.transpose"(%2807, %2808) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3495)
+    %2810 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3496)
+    %2811 = "ttir.matmul"(%2777, %2809, %2810) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3496)
+    %2812 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3497)
+    %2813 = "ttir.unsqueeze"(%2811, %2812) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3497)
+    %2814 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3498)
+    %2815 = "ttir.multiply"(%2813, %arg196, %2814) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3498)
+    %2816 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3499)
+    %2817 = "ttir.add"(%2815, %arg197, %2816) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3499)
+    %2818 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3500)
+    %2819 = "ttir.softmax"(%2817, %2818) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3500)
+    %2820 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3501)
+    %2821 = "ttir.squeeze"(%2819, %2820) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3501)
+    %2822 = tensor.empty() : tensor<12x3200xf32> loc(#loc3502)
+    %2823 = "ttir.matmul"(%2737, %arg446, %2822) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3502)
+    %2824 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3503)
+    %2825 = "ttir.reshape"(%2823, %2824) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3503)
+    %2826 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3504)
+    %2827 = "ttir.transpose"(%2825, %2826) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3504)
+    %2828 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3505)
+    %2829 = "ttir.transpose"(%2827, %2828) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3505)
+    %2830 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3506)
+    %2831 = "ttir.squeeze"(%2829, %2830) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3506)
+    %2832 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3507)
+    %2833 = "ttir.transpose"(%2831, %2832) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3507)
+    %2834 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3508)
+    %2835 = "ttir.matmul"(%2821, %2833, %2834) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3508)
+    %2836 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3509)
+    %2837 = "ttir.unsqueeze"(%2835, %2836) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3509)
+    %2838 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3510)
+    %2839 = "ttir.transpose"(%2837, %2838) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3510)
+    %2840 = tensor.empty() : tensor<12x3200xf32> loc(#loc3511)
+    %2841 = "ttir.reshape"(%2839, %2840) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3511)
+    %2842 = tensor.empty() : tensor<12x3200xf32> loc(#loc3512)
+    %2843 = "ttir.matmul"(%2841, %arg447, %2842) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3512)
+    %2844 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3513)
+    %2845 = "ttir.unsqueeze"(%2843, %2844) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3513)
+    %2846 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3514)
+    %2847 = "ttir.add"(%2721, %2845, %2846) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3514)
+    %2848 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3515)
+    %2849 = "ttir.multiply"(%2847, %2847, %2848) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3515)
+    %2850 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3516)
+    %2851 = "ttir.mean"(%2849, %2850) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3516)
+    %2852 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3517)
+    %2853 = "ttir.add"(%2851, %arg198, %2852) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3517)
+    %2854 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3518)
+    %2855 = "ttir.sqrt"(%2853, %2854) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3518)
+    %2856 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3519)
+    %2857 = "ttir.reciprocal"(%2855, %2856) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3519)
+    %2858 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3520)
+    %2859 = "ttir.multiply"(%2847, %2857, %2858) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3520)
+    %2860 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3521)
+    %2861 = "ttir.multiply"(%arg448, %2859, %2860) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3521)
+    %2862 = tensor.empty() : tensor<12x3200xf32> loc(#loc3522)
+    %2863 = "ttir.squeeze"(%2861, %2862) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3522)
+    %2864 = tensor.empty() : tensor<12x8640xf32> loc(#loc3523)
+    %2865 = "ttir.matmul"(%2863, %arg449, %2864) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc3523)
+    %2866 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3524)
+    %2867 = "ttir.unsqueeze"(%2865, %2866) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3524)
+    %2868 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3525)
+    %2869 = "ttir.sigmoid"(%2867, %2868) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3525)
+    %2870 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3526)
+    %2871 = "ttir.multiply"(%2867, %2869, %2870) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3526)
+    %2872 = tensor.empty() : tensor<12x8640xf32> loc(#loc3527)
+    %2873 = "ttir.matmul"(%2863, %arg450, %2872) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc3527)
+    %2874 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3528)
+    %2875 = "ttir.unsqueeze"(%2873, %2874) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3528)
+    %2876 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3529)
+    %2877 = "ttir.multiply"(%2871, %2875, %2876) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3529)
+    %2878 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3530)
+    %2879 = "ttir.matmul"(%2877, %arg451, %2878) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3530)
+    %2880 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3531)
+    %2881 = "ttir.add"(%2847, %2879, %2880) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3531)
+    %2882 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3532)
+    %2883 = "ttir.multiply"(%2881, %2881, %2882) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3532)
+    %2884 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3533)
+    %2885 = "ttir.mean"(%2883, %2884) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3533)
+    %2886 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3534)
+    %2887 = "ttir.add"(%2885, %arg199, %2886) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3534)
+    %2888 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3535)
+    %2889 = "ttir.sqrt"(%2887, %2888) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3535)
+    %2890 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3536)
+    %2891 = "ttir.reciprocal"(%2889, %2890) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3536)
+    %2892 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3537)
+    %2893 = "ttir.multiply"(%2881, %2891, %2892) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3537)
+    %2894 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3538)
+    %2895 = "ttir.multiply"(%arg452, %2893, %2894) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3538)
+    %2896 = tensor.empty() : tensor<12x3200xf32> loc(#loc3539)
+    %2897 = "ttir.squeeze"(%2895, %2896) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3539)
+    %2898 = tensor.empty() : tensor<12x3200xf32> loc(#loc3540)
+    %2899 = "ttir.matmul"(%2897, %arg453, %2898) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3540)
+    %2900 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3541)
+    %2901 = "ttir.reshape"(%2899, %2900) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3541)
+    %2902 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3542)
+    %2903 = "ttir.transpose"(%2901, %2902) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3542)
+    %2904 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3543)
+    %2905 = "ttir.concat"(%arg200, %arg200, %2904) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3543)
+    %2906 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3544)
+    %2907 = "ttir.sin"(%2905, %2906) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3544)
+    %2908 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc3545)
+    %2909 = "ttir.unsqueeze"(%2907, %2908) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc3545)
+    %2910 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3546)
+    %2911 = "ttir.multiply"(%2903, %2909, %2910) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3546)
+    %2912 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3547)
+    %2913 = "ttir.transpose"(%2903, %2912) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3547)
+    %2914 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3548)
+    %2915 = "ttir.matmul"(%arg201, %2913, %2914) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3548)
+    %2916 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3549)
+    %2917 = "ttir.transpose"(%2915, %2916) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3549)
+    %2918 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3550)
+    %2919 = "ttir.multiply"(%2917, %arg202, %2918) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3550)
+    %2920 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3551)
+    %2921 = "ttir.transpose"(%2903, %2920) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3551)
+    %2922 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3552)
+    %2923 = "ttir.matmul"(%arg203, %2921, %2922) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3552)
+    %2924 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3553)
+    %2925 = "ttir.transpose"(%2923, %2924) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3553)
+    %2926 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3554)
+    %2927 = "ttir.concat"(%2919, %2925, %2926) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3554)
+    %2928 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3555)
+    %2929 = "ttir.cos"(%2905, %2928) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3555)
+    %2930 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc3556)
+    %2931 = "ttir.unsqueeze"(%2929, %2930) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc3556)
+    %2932 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3557)
+    %2933 = "ttir.multiply"(%2927, %2931, %2932) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3557)
+    %2934 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3558)
+    %2935 = "ttir.add"(%2911, %2933, %2934) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3558)
+    %2936 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3559)
+    %2937 = "ttir.squeeze"(%2935, %2936) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3559)
+    %2938 = tensor.empty() : tensor<12x3200xf32> loc(#loc3560)
+    %2939 = "ttir.matmul"(%2897, %arg454, %2938) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3560)
+    %2940 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3561)
+    %2941 = "ttir.reshape"(%2939, %2940) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3561)
+    %2942 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3562)
+    %2943 = "ttir.transpose"(%2941, %2942) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3562)
+    %2944 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3563)
+    %2945 = "ttir.multiply"(%2943, %2909, %2944) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3563)
+    %2946 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3564)
+    %2947 = "ttir.transpose"(%2943, %2946) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3564)
+    %2948 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3565)
+    %2949 = "ttir.matmul"(%arg204, %2947, %2948) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3565)
+    %2950 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3566)
+    %2951 = "ttir.transpose"(%2949, %2950) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3566)
+    %2952 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3567)
+    %2953 = "ttir.multiply"(%2951, %arg205, %2952) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3567)
+    %2954 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3568)
+    %2955 = "ttir.transpose"(%2943, %2954) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3568)
+    %2956 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3569)
+    %2957 = "ttir.matmul"(%arg206, %2955, %2956) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3569)
+    %2958 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3570)
+    %2959 = "ttir.transpose"(%2957, %2958) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3570)
+    %2960 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3571)
+    %2961 = "ttir.concat"(%2953, %2959, %2960) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3571)
+    %2962 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3572)
+    %2963 = "ttir.multiply"(%2961, %2931, %2962) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3572)
+    %2964 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3573)
+    %2965 = "ttir.add"(%2945, %2963, %2964) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3573)
+    %2966 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3574)
+    %2967 = "ttir.squeeze"(%2965, %2966) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3574)
+    %2968 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3575)
+    %2969 = "ttir.transpose"(%2967, %2968) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3575)
+    %2970 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3576)
+    %2971 = "ttir.matmul"(%2937, %2969, %2970) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3576)
+    %2972 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3577)
+    %2973 = "ttir.unsqueeze"(%2971, %2972) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3577)
+    %2974 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3578)
+    %2975 = "ttir.multiply"(%2973, %arg207, %2974) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3578)
+    %2976 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3579)
+    %2977 = "ttir.add"(%2975, %arg208, %2976) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3579)
+    %2978 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3580)
+    %2979 = "ttir.softmax"(%2977, %2978) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3580)
+    %2980 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3581)
+    %2981 = "ttir.squeeze"(%2979, %2980) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3581)
+    %2982 = tensor.empty() : tensor<12x3200xf32> loc(#loc3582)
+    %2983 = "ttir.matmul"(%2897, %arg455, %2982) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3582)
+    %2984 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3583)
+    %2985 = "ttir.reshape"(%2983, %2984) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3583)
+    %2986 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3584)
+    %2987 = "ttir.transpose"(%2985, %2986) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3584)
+    %2988 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3585)
+    %2989 = "ttir.transpose"(%2987, %2988) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3585)
+    %2990 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3586)
+    %2991 = "ttir.squeeze"(%2989, %2990) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3586)
+    %2992 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3587)
+    %2993 = "ttir.transpose"(%2991, %2992) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3587)
+    %2994 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3588)
+    %2995 = "ttir.matmul"(%2981, %2993, %2994) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3588)
+    %2996 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3589)
+    %2997 = "ttir.unsqueeze"(%2995, %2996) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3589)
+    %2998 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3590)
+    %2999 = "ttir.transpose"(%2997, %2998) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3590)
+    %3000 = tensor.empty() : tensor<12x3200xf32> loc(#loc3591)
+    %3001 = "ttir.reshape"(%2999, %3000) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3591)
+    %3002 = tensor.empty() : tensor<12x3200xf32> loc(#loc3592)
+    %3003 = "ttir.matmul"(%3001, %arg456, %3002) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3592)
+    %3004 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3593)
+    %3005 = "ttir.unsqueeze"(%3003, %3004) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3593)
+    %3006 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3594)
+    %3007 = "ttir.add"(%2881, %3005, %3006) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3594)
+    %3008 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3595)
+    %3009 = "ttir.multiply"(%3007, %3007, %3008) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3595)
+    %3010 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3596)
+    %3011 = "ttir.mean"(%3009, %3010) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3596)
+    %3012 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3597)
+    %3013 = "ttir.add"(%3011, %arg209, %3012) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3597)
+    %3014 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3598)
+    %3015 = "ttir.sqrt"(%3013, %3014) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3598)
+    %3016 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3599)
+    %3017 = "ttir.reciprocal"(%3015, %3016) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3599)
+    %3018 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3600)
+    %3019 = "ttir.multiply"(%3007, %3017, %3018) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3600)
+    %3020 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3601)
+    %3021 = "ttir.multiply"(%arg457, %3019, %3020) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3601)
+    %3022 = tensor.empty() : tensor<12x3200xf32> loc(#loc3602)
+    %3023 = "ttir.squeeze"(%3021, %3022) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3602)
+    %3024 = tensor.empty() : tensor<12x8640xf32> loc(#loc3603)
+    %3025 = "ttir.matmul"(%3023, %arg458, %3024) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc3603)
+    %3026 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3604)
+    %3027 = "ttir.unsqueeze"(%3025, %3026) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3604)
+    %3028 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3605)
+    %3029 = "ttir.sigmoid"(%3027, %3028) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3605)
+    %3030 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3606)
+    %3031 = "ttir.multiply"(%3027, %3029, %3030) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3606)
+    %3032 = tensor.empty() : tensor<12x8640xf32> loc(#loc3607)
+    %3033 = "ttir.matmul"(%3023, %arg459, %3032) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc3607)
+    %3034 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3608)
+    %3035 = "ttir.unsqueeze"(%3033, %3034) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3608)
+    %3036 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3609)
+    %3037 = "ttir.multiply"(%3031, %3035, %3036) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3609)
+    %3038 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3610)
+    %3039 = "ttir.matmul"(%3037, %arg460, %3038) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3610)
+    %3040 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3611)
+    %3041 = "ttir.add"(%3007, %3039, %3040) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3611)
+    %3042 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3612)
+    %3043 = "ttir.multiply"(%3041, %3041, %3042) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3612)
+    %3044 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3613)
+    %3045 = "ttir.mean"(%3043, %3044) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3613)
+    %3046 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3614)
+    %3047 = "ttir.add"(%3045, %arg210, %3046) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3614)
+    %3048 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3615)
+    %3049 = "ttir.sqrt"(%3047, %3048) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3615)
+    %3050 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3616)
+    %3051 = "ttir.reciprocal"(%3049, %3050) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3616)
+    %3052 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3617)
+    %3053 = "ttir.multiply"(%3041, %3051, %3052) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3617)
+    %3054 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3618)
+    %3055 = "ttir.multiply"(%arg461, %3053, %3054) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3618)
+    %3056 = tensor.empty() : tensor<12x3200xf32> loc(#loc3619)
+    %3057 = "ttir.squeeze"(%3055, %3056) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3619)
+    %3058 = tensor.empty() : tensor<12x3200xf32> loc(#loc3620)
+    %3059 = "ttir.matmul"(%3057, %arg462, %3058) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3620)
+    %3060 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3621)
+    %3061 = "ttir.reshape"(%3059, %3060) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3621)
+    %3062 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3622)
+    %3063 = "ttir.transpose"(%3061, %3062) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3622)
+    %3064 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3623)
+    %3065 = "ttir.concat"(%arg211, %arg211, %3064) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3623)
+    %3066 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3624)
+    %3067 = "ttir.sin"(%3065, %3066) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3624)
+    %3068 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc3625)
+    %3069 = "ttir.unsqueeze"(%3067, %3068) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc3625)
+    %3070 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3626)
+    %3071 = "ttir.multiply"(%3063, %3069, %3070) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3626)
+    %3072 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3627)
+    %3073 = "ttir.transpose"(%3063, %3072) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3627)
+    %3074 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3628)
+    %3075 = "ttir.matmul"(%arg212, %3073, %3074) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3628)
+    %3076 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3629)
+    %3077 = "ttir.transpose"(%3075, %3076) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3629)
+    %3078 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3630)
+    %3079 = "ttir.multiply"(%3077, %arg213, %3078) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3630)
+    %3080 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3631)
+    %3081 = "ttir.transpose"(%3063, %3080) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3631)
+    %3082 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3632)
+    %3083 = "ttir.matmul"(%arg214, %3081, %3082) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3632)
+    %3084 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3633)
+    %3085 = "ttir.transpose"(%3083, %3084) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3633)
+    %3086 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3634)
+    %3087 = "ttir.concat"(%3079, %3085, %3086) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3634)
+    %3088 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3635)
+    %3089 = "ttir.cos"(%3065, %3088) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3635)
+    %3090 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc3636)
+    %3091 = "ttir.unsqueeze"(%3089, %3090) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc3636)
+    %3092 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3637)
+    %3093 = "ttir.multiply"(%3087, %3091, %3092) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3637)
+    %3094 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3638)
+    %3095 = "ttir.add"(%3071, %3093, %3094) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3638)
+    %3096 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3639)
+    %3097 = "ttir.squeeze"(%3095, %3096) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3639)
+    %3098 = tensor.empty() : tensor<12x3200xf32> loc(#loc3640)
+    %3099 = "ttir.matmul"(%3057, %arg463, %3098) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3640)
+    %3100 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3641)
+    %3101 = "ttir.reshape"(%3099, %3100) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3641)
+    %3102 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3642)
+    %3103 = "ttir.transpose"(%3101, %3102) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3642)
+    %3104 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3643)
+    %3105 = "ttir.multiply"(%3103, %3069, %3104) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3643)
+    %3106 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3644)
+    %3107 = "ttir.transpose"(%3103, %3106) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3644)
+    %3108 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3645)
+    %3109 = "ttir.matmul"(%arg215, %3107, %3108) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3645)
+    %3110 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3646)
+    %3111 = "ttir.transpose"(%3109, %3110) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3646)
+    %3112 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3647)
+    %3113 = "ttir.multiply"(%3111, %arg216, %3112) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3647)
+    %3114 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3648)
+    %3115 = "ttir.transpose"(%3103, %3114) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3648)
+    %3116 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3649)
+    %3117 = "ttir.matmul"(%arg217, %3115, %3116) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3649)
+    %3118 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3650)
+    %3119 = "ttir.transpose"(%3117, %3118) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3650)
+    %3120 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3651)
+    %3121 = "ttir.concat"(%3113, %3119, %3120) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3651)
+    %3122 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3652)
+    %3123 = "ttir.multiply"(%3121, %3091, %3122) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3652)
+    %3124 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3653)
+    %3125 = "ttir.add"(%3105, %3123, %3124) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3653)
+    %3126 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3654)
+    %3127 = "ttir.squeeze"(%3125, %3126) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3654)
+    %3128 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3655)
+    %3129 = "ttir.transpose"(%3127, %3128) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3655)
+    %3130 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3656)
+    %3131 = "ttir.matmul"(%3097, %3129, %3130) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3656)
+    %3132 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3657)
+    %3133 = "ttir.unsqueeze"(%3131, %3132) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3657)
+    %3134 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3658)
+    %3135 = "ttir.multiply"(%3133, %arg218, %3134) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3658)
+    %3136 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3659)
+    %3137 = "ttir.add"(%3135, %arg219, %3136) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3659)
+    %3138 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3660)
+    %3139 = "ttir.softmax"(%3137, %3138) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3660)
+    %3140 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3661)
+    %3141 = "ttir.squeeze"(%3139, %3140) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3661)
+    %3142 = tensor.empty() : tensor<12x3200xf32> loc(#loc3662)
+    %3143 = "ttir.matmul"(%3057, %arg464, %3142) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3662)
+    %3144 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3663)
+    %3145 = "ttir.reshape"(%3143, %3144) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3663)
+    %3146 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3664)
+    %3147 = "ttir.transpose"(%3145, %3146) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3664)
+    %3148 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3665)
+    %3149 = "ttir.transpose"(%3147, %3148) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3665)
+    %3150 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3666)
+    %3151 = "ttir.squeeze"(%3149, %3150) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3666)
+    %3152 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3667)
+    %3153 = "ttir.transpose"(%3151, %3152) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3667)
+    %3154 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3668)
+    %3155 = "ttir.matmul"(%3141, %3153, %3154) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3668)
+    %3156 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3669)
+    %3157 = "ttir.unsqueeze"(%3155, %3156) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3669)
+    %3158 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3670)
+    %3159 = "ttir.transpose"(%3157, %3158) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3670)
+    %3160 = tensor.empty() : tensor<12x3200xf32> loc(#loc3671)
+    %3161 = "ttir.reshape"(%3159, %3160) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3671)
+    %3162 = tensor.empty() : tensor<12x3200xf32> loc(#loc3672)
+    %3163 = "ttir.matmul"(%3161, %arg465, %3162) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3672)
+    %3164 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3673)
+    %3165 = "ttir.unsqueeze"(%3163, %3164) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3673)
+    %3166 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3674)
+    %3167 = "ttir.add"(%3041, %3165, %3166) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3674)
+    %3168 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3675)
+    %3169 = "ttir.multiply"(%3167, %3167, %3168) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3675)
+    %3170 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3676)
+    %3171 = "ttir.mean"(%3169, %3170) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3676)
+    %3172 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3677)
+    %3173 = "ttir.add"(%3171, %arg220, %3172) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3677)
+    %3174 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3678)
+    %3175 = "ttir.sqrt"(%3173, %3174) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3678)
+    %3176 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3679)
+    %3177 = "ttir.reciprocal"(%3175, %3176) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3679)
+    %3178 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3680)
+    %3179 = "ttir.multiply"(%3167, %3177, %3178) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3680)
+    %3180 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3681)
+    %3181 = "ttir.multiply"(%arg466, %3179, %3180) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3681)
+    %3182 = tensor.empty() : tensor<12x3200xf32> loc(#loc3682)
+    %3183 = "ttir.squeeze"(%3181, %3182) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3682)
+    %3184 = tensor.empty() : tensor<12x8640xf32> loc(#loc3683)
+    %3185 = "ttir.matmul"(%3183, %arg467, %3184) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc3683)
+    %3186 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3684)
+    %3187 = "ttir.unsqueeze"(%3185, %3186) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3684)
+    %3188 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3685)
+    %3189 = "ttir.sigmoid"(%3187, %3188) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3685)
+    %3190 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3686)
+    %3191 = "ttir.multiply"(%3187, %3189, %3190) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3686)
+    %3192 = tensor.empty() : tensor<12x8640xf32> loc(#loc3687)
+    %3193 = "ttir.matmul"(%3183, %arg468, %3192) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc3687)
+    %3194 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3688)
+    %3195 = "ttir.unsqueeze"(%3193, %3194) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3688)
+    %3196 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3689)
+    %3197 = "ttir.multiply"(%3191, %3195, %3196) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3689)
+    %3198 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3690)
+    %3199 = "ttir.matmul"(%3197, %arg469, %3198) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3690)
+    %3200 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3691)
+    %3201 = "ttir.add"(%3167, %3199, %3200) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3691)
+    %3202 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3692)
+    %3203 = "ttir.multiply"(%3201, %3201, %3202) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3692)
+    %3204 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3693)
+    %3205 = "ttir.mean"(%3203, %3204) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3693)
+    %3206 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3694)
+    %3207 = "ttir.add"(%3205, %arg221, %3206) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3694)
+    %3208 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3695)
+    %3209 = "ttir.sqrt"(%3207, %3208) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3695)
+    %3210 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3696)
+    %3211 = "ttir.reciprocal"(%3209, %3210) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3696)
+    %3212 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3697)
+    %3213 = "ttir.multiply"(%3201, %3211, %3212) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3697)
+    %3214 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3698)
+    %3215 = "ttir.multiply"(%arg470, %3213, %3214) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3698)
+    %3216 = tensor.empty() : tensor<12x3200xf32> loc(#loc3699)
+    %3217 = "ttir.squeeze"(%3215, %3216) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3699)
+    %3218 = tensor.empty() : tensor<12x3200xf32> loc(#loc3700)
+    %3219 = "ttir.matmul"(%3217, %arg471, %3218) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3700)
+    %3220 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3701)
+    %3221 = "ttir.reshape"(%3219, %3220) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3701)
+    %3222 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3702)
+    %3223 = "ttir.transpose"(%3221, %3222) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3702)
+    %3224 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3703)
+    %3225 = "ttir.concat"(%arg222, %arg222, %3224) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3703)
+    %3226 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3704)
+    %3227 = "ttir.sin"(%3225, %3226) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3704)
+    %3228 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc3705)
+    %3229 = "ttir.unsqueeze"(%3227, %3228) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc3705)
+    %3230 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3706)
+    %3231 = "ttir.multiply"(%3223, %3229, %3230) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3706)
+    %3232 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3707)
+    %3233 = "ttir.transpose"(%3223, %3232) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3707)
+    %3234 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3708)
+    %3235 = "ttir.matmul"(%arg223, %3233, %3234) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3708)
+    %3236 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3709)
+    %3237 = "ttir.transpose"(%3235, %3236) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3709)
+    %3238 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3710)
+    %3239 = "ttir.multiply"(%3237, %arg224, %3238) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3710)
+    %3240 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3711)
+    %3241 = "ttir.transpose"(%3223, %3240) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3711)
+    %3242 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3712)
+    %3243 = "ttir.matmul"(%arg225, %3241, %3242) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3712)
+    %3244 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3713)
+    %3245 = "ttir.transpose"(%3243, %3244) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3713)
+    %3246 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3714)
+    %3247 = "ttir.concat"(%3239, %3245, %3246) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3714)
+    %3248 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3715)
+    %3249 = "ttir.cos"(%3225, %3248) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3715)
+    %3250 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc3716)
+    %3251 = "ttir.unsqueeze"(%3249, %3250) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc3716)
+    %3252 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3717)
+    %3253 = "ttir.multiply"(%3247, %3251, %3252) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3717)
+    %3254 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3718)
+    %3255 = "ttir.add"(%3231, %3253, %3254) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3718)
+    %3256 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3719)
+    %3257 = "ttir.squeeze"(%3255, %3256) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3719)
+    %3258 = tensor.empty() : tensor<12x3200xf32> loc(#loc3720)
+    %3259 = "ttir.matmul"(%3217, %arg472, %3258) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3720)
+    %3260 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3721)
+    %3261 = "ttir.reshape"(%3259, %3260) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3721)
+    %3262 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3722)
+    %3263 = "ttir.transpose"(%3261, %3262) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3722)
+    %3264 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3723)
+    %3265 = "ttir.multiply"(%3263, %3229, %3264) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3723)
+    %3266 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3724)
+    %3267 = "ttir.transpose"(%3263, %3266) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3724)
+    %3268 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3725)
+    %3269 = "ttir.matmul"(%arg226, %3267, %3268) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3725)
+    %3270 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3726)
+    %3271 = "ttir.transpose"(%3269, %3270) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3726)
+    %3272 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3727)
+    %3273 = "ttir.multiply"(%3271, %arg227, %3272) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3727)
+    %3274 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3728)
+    %3275 = "ttir.transpose"(%3263, %3274) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3728)
+    %3276 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3729)
+    %3277 = "ttir.matmul"(%arg228, %3275, %3276) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3729)
+    %3278 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3730)
+    %3279 = "ttir.transpose"(%3277, %3278) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3730)
+    %3280 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3731)
+    %3281 = "ttir.concat"(%3273, %3279, %3280) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3731)
+    %3282 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3732)
+    %3283 = "ttir.multiply"(%3281, %3251, %3282) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3732)
+    %3284 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3733)
+    %3285 = "ttir.add"(%3265, %3283, %3284) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3733)
+    %3286 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3734)
+    %3287 = "ttir.squeeze"(%3285, %3286) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3734)
+    %3288 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3735)
+    %3289 = "ttir.transpose"(%3287, %3288) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3735)
+    %3290 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3736)
+    %3291 = "ttir.matmul"(%3257, %3289, %3290) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3736)
+    %3292 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3737)
+    %3293 = "ttir.unsqueeze"(%3291, %3292) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3737)
+    %3294 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3738)
+    %3295 = "ttir.multiply"(%3293, %arg229, %3294) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3738)
+    %3296 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3739)
+    %3297 = "ttir.add"(%3295, %arg230, %3296) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3739)
+    %3298 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3740)
+    %3299 = "ttir.softmax"(%3297, %3298) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3740)
+    %3300 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3741)
+    %3301 = "ttir.squeeze"(%3299, %3300) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3741)
+    %3302 = tensor.empty() : tensor<12x3200xf32> loc(#loc3742)
+    %3303 = "ttir.matmul"(%3217, %arg473, %3302) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3742)
+    %3304 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3743)
+    %3305 = "ttir.reshape"(%3303, %3304) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3743)
+    %3306 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3744)
+    %3307 = "ttir.transpose"(%3305, %3306) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3744)
+    %3308 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3745)
+    %3309 = "ttir.transpose"(%3307, %3308) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3745)
+    %3310 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3746)
+    %3311 = "ttir.squeeze"(%3309, %3310) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3746)
+    %3312 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3747)
+    %3313 = "ttir.transpose"(%3311, %3312) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3747)
+    %3314 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3748)
+    %3315 = "ttir.matmul"(%3301, %3313, %3314) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3748)
+    %3316 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3749)
+    %3317 = "ttir.unsqueeze"(%3315, %3316) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3749)
+    %3318 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3750)
+    %3319 = "ttir.transpose"(%3317, %3318) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3750)
+    %3320 = tensor.empty() : tensor<12x3200xf32> loc(#loc3751)
+    %3321 = "ttir.reshape"(%3319, %3320) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3751)
+    %3322 = tensor.empty() : tensor<12x3200xf32> loc(#loc3752)
+    %3323 = "ttir.matmul"(%3321, %arg474, %3322) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3752)
+    %3324 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3753)
+    %3325 = "ttir.unsqueeze"(%3323, %3324) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3753)
+    %3326 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3754)
+    %3327 = "ttir.add"(%3201, %3325, %3326) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3754)
+    %3328 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3755)
+    %3329 = "ttir.multiply"(%3327, %3327, %3328) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3755)
+    %3330 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3756)
+    %3331 = "ttir.mean"(%3329, %3330) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3756)
+    %3332 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3757)
+    %3333 = "ttir.add"(%3331, %arg231, %3332) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3757)
+    %3334 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3758)
+    %3335 = "ttir.sqrt"(%3333, %3334) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3758)
+    %3336 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3759)
+    %3337 = "ttir.reciprocal"(%3335, %3336) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3759)
+    %3338 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3760)
+    %3339 = "ttir.multiply"(%3327, %3337, %3338) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3760)
+    %3340 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3761)
+    %3341 = "ttir.multiply"(%arg475, %3339, %3340) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3761)
+    %3342 = tensor.empty() : tensor<12x3200xf32> loc(#loc3762)
+    %3343 = "ttir.squeeze"(%3341, %3342) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3762)
+    %3344 = tensor.empty() : tensor<12x8640xf32> loc(#loc3763)
+    %3345 = "ttir.matmul"(%3343, %arg476, %3344) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc3763)
+    %3346 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3764)
+    %3347 = "ttir.unsqueeze"(%3345, %3346) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3764)
+    %3348 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3765)
+    %3349 = "ttir.sigmoid"(%3347, %3348) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3765)
+    %3350 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3766)
+    %3351 = "ttir.multiply"(%3347, %3349, %3350) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3766)
+    %3352 = tensor.empty() : tensor<12x8640xf32> loc(#loc3767)
+    %3353 = "ttir.matmul"(%3343, %arg477, %3352) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc3767)
+    %3354 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3768)
+    %3355 = "ttir.unsqueeze"(%3353, %3354) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3768)
+    %3356 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3769)
+    %3357 = "ttir.multiply"(%3351, %3355, %3356) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3769)
+    %3358 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3770)
+    %3359 = "ttir.matmul"(%3357, %arg478, %3358) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3770)
+    %3360 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3771)
+    %3361 = "ttir.add"(%3327, %3359, %3360) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3771)
+    %3362 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3772)
+    %3363 = "ttir.multiply"(%3361, %3361, %3362) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3772)
+    %3364 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3773)
+    %3365 = "ttir.mean"(%3363, %3364) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3773)
+    %3366 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3774)
+    %3367 = "ttir.add"(%3365, %arg232, %3366) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3774)
+    %3368 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3775)
+    %3369 = "ttir.sqrt"(%3367, %3368) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3775)
+    %3370 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3776)
+    %3371 = "ttir.reciprocal"(%3369, %3370) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3776)
+    %3372 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3777)
+    %3373 = "ttir.multiply"(%3361, %3371, %3372) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3777)
+    %3374 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3778)
+    %3375 = "ttir.multiply"(%arg479, %3373, %3374) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3778)
+    %3376 = tensor.empty() : tensor<12x3200xf32> loc(#loc3779)
+    %3377 = "ttir.squeeze"(%3375, %3376) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3779)
+    %3378 = tensor.empty() : tensor<12x3200xf32> loc(#loc3780)
+    %3379 = "ttir.matmul"(%3377, %arg480, %3378) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3780)
+    %3380 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3781)
+    %3381 = "ttir.reshape"(%3379, %3380) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3781)
+    %3382 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3782)
+    %3383 = "ttir.transpose"(%3381, %3382) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3782)
+    %3384 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3783)
+    %3385 = "ttir.concat"(%arg233, %arg233, %3384) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3783)
+    %3386 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3784)
+    %3387 = "ttir.sin"(%3385, %3386) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3784)
+    %3388 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc3785)
+    %3389 = "ttir.unsqueeze"(%3387, %3388) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc3785)
+    %3390 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3786)
+    %3391 = "ttir.multiply"(%3383, %3389, %3390) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3786)
+    %3392 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3787)
+    %3393 = "ttir.transpose"(%3383, %3392) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3787)
+    %3394 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3788)
+    %3395 = "ttir.matmul"(%arg234, %3393, %3394) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3788)
+    %3396 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3789)
+    %3397 = "ttir.transpose"(%3395, %3396) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3789)
+    %3398 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3790)
+    %3399 = "ttir.multiply"(%3397, %arg235, %3398) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3790)
+    %3400 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3791)
+    %3401 = "ttir.transpose"(%3383, %3400) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3791)
+    %3402 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3792)
+    %3403 = "ttir.matmul"(%arg236, %3401, %3402) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3792)
+    %3404 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3793)
+    %3405 = "ttir.transpose"(%3403, %3404) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3793)
+    %3406 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3794)
+    %3407 = "ttir.concat"(%3399, %3405, %3406) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3794)
+    %3408 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3795)
+    %3409 = "ttir.cos"(%3385, %3408) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3795)
+    %3410 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc3796)
+    %3411 = "ttir.unsqueeze"(%3409, %3410) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc3796)
+    %3412 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3797)
+    %3413 = "ttir.multiply"(%3407, %3411, %3412) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3797)
+    %3414 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3798)
+    %3415 = "ttir.add"(%3391, %3413, %3414) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3798)
+    %3416 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3799)
+    %3417 = "ttir.squeeze"(%3415, %3416) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3799)
+    %3418 = tensor.empty() : tensor<12x3200xf32> loc(#loc3800)
+    %3419 = "ttir.matmul"(%3377, %arg481, %3418) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3800)
+    %3420 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3801)
+    %3421 = "ttir.reshape"(%3419, %3420) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3801)
+    %3422 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3802)
+    %3423 = "ttir.transpose"(%3421, %3422) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3802)
+    %3424 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3803)
+    %3425 = "ttir.multiply"(%3423, %3389, %3424) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3803)
+    %3426 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3804)
+    %3427 = "ttir.transpose"(%3423, %3426) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3804)
+    %3428 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3805)
+    %3429 = "ttir.matmul"(%arg237, %3427, %3428) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3805)
+    %3430 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3806)
+    %3431 = "ttir.transpose"(%3429, %3430) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3806)
+    %3432 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3807)
+    %3433 = "ttir.multiply"(%3431, %arg238, %3432) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3807)
+    %3434 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3808)
+    %3435 = "ttir.transpose"(%3423, %3434) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3808)
+    %3436 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3809)
+    %3437 = "ttir.matmul"(%arg239, %3435, %3436) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3809)
+    %3438 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3810)
+    %3439 = "ttir.transpose"(%3437, %3438) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3810)
+    %3440 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3811)
+    %3441 = "ttir.concat"(%3433, %3439, %3440) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3811)
+    %3442 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3812)
+    %3443 = "ttir.multiply"(%3441, %3411, %3442) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3812)
+    %3444 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3813)
+    %3445 = "ttir.add"(%3425, %3443, %3444) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3813)
+    %3446 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3814)
+    %3447 = "ttir.squeeze"(%3445, %3446) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3814)
+    %3448 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3815)
+    %3449 = "ttir.transpose"(%3447, %3448) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3815)
+    %3450 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3816)
+    %3451 = "ttir.matmul"(%3417, %3449, %3450) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3816)
+    %3452 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3817)
+    %3453 = "ttir.unsqueeze"(%3451, %3452) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3817)
+    %3454 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3818)
+    %3455 = "ttir.multiply"(%3453, %arg240, %3454) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3818)
+    %3456 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3819)
+    %3457 = "ttir.add"(%3455, %arg241, %3456) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3819)
+    %3458 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3820)
+    %3459 = "ttir.softmax"(%3457, %3458) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3820)
+    %3460 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3821)
+    %3461 = "ttir.squeeze"(%3459, %3460) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3821)
+    %3462 = tensor.empty() : tensor<12x3200xf32> loc(#loc3822)
+    %3463 = "ttir.matmul"(%3377, %arg482, %3462) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3822)
+    %3464 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3823)
+    %3465 = "ttir.reshape"(%3463, %3464) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3823)
+    %3466 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3824)
+    %3467 = "ttir.transpose"(%3465, %3466) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3824)
+    %3468 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3825)
+    %3469 = "ttir.transpose"(%3467, %3468) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3825)
+    %3470 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3826)
+    %3471 = "ttir.squeeze"(%3469, %3470) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3826)
+    %3472 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3827)
+    %3473 = "ttir.transpose"(%3471, %3472) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3827)
+    %3474 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3828)
+    %3475 = "ttir.matmul"(%3461, %3473, %3474) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3828)
+    %3476 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3829)
+    %3477 = "ttir.unsqueeze"(%3475, %3476) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3829)
+    %3478 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3830)
+    %3479 = "ttir.transpose"(%3477, %3478) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3830)
+    %3480 = tensor.empty() : tensor<12x3200xf32> loc(#loc3831)
+    %3481 = "ttir.reshape"(%3479, %3480) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3831)
+    %3482 = tensor.empty() : tensor<12x3200xf32> loc(#loc3832)
+    %3483 = "ttir.matmul"(%3481, %arg483, %3482) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3832)
+    %3484 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3833)
+    %3485 = "ttir.unsqueeze"(%3483, %3484) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3833)
+    %3486 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3834)
+    %3487 = "ttir.add"(%3361, %3485, %3486) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3834)
+    %3488 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3835)
+    %3489 = "ttir.multiply"(%3487, %3487, %3488) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3835)
+    %3490 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3836)
+    %3491 = "ttir.mean"(%3489, %3490) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3836)
+    %3492 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3837)
+    %3493 = "ttir.add"(%3491, %arg242, %3492) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3837)
+    %3494 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3838)
+    %3495 = "ttir.sqrt"(%3493, %3494) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3838)
+    %3496 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3839)
+    %3497 = "ttir.reciprocal"(%3495, %3496) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3839)
+    %3498 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3840)
+    %3499 = "ttir.multiply"(%3487, %3497, %3498) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3840)
+    %3500 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3841)
+    %3501 = "ttir.multiply"(%arg484, %3499, %3500) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3841)
+    %3502 = tensor.empty() : tensor<12x3200xf32> loc(#loc3842)
+    %3503 = "ttir.squeeze"(%3501, %3502) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3842)
+    %3504 = tensor.empty() : tensor<12x8640xf32> loc(#loc3843)
+    %3505 = "ttir.matmul"(%3503, %arg485, %3504) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc3843)
+    %3506 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3844)
+    %3507 = "ttir.unsqueeze"(%3505, %3506) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3844)
+    %3508 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3845)
+    %3509 = "ttir.sigmoid"(%3507, %3508) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3845)
+    %3510 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3846)
+    %3511 = "ttir.multiply"(%3507, %3509, %3510) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3846)
+    %3512 = tensor.empty() : tensor<12x8640xf32> loc(#loc3847)
+    %3513 = "ttir.matmul"(%3503, %arg486, %3512) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc3847)
+    %3514 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3848)
+    %3515 = "ttir.unsqueeze"(%3513, %3514) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3848)
+    %3516 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3849)
+    %3517 = "ttir.multiply"(%3511, %3515, %3516) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3849)
+    %3518 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3850)
+    %3519 = "ttir.matmul"(%3517, %arg487, %3518) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3850)
+    %3520 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3851)
+    %3521 = "ttir.add"(%3487, %3519, %3520) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3851)
+    %3522 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3852)
+    %3523 = "ttir.multiply"(%3521, %3521, %3522) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3852)
+    %3524 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3853)
+    %3525 = "ttir.mean"(%3523, %3524) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3853)
+    %3526 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3854)
+    %3527 = "ttir.add"(%3525, %arg243, %3526) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3854)
+    %3528 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3855)
+    %3529 = "ttir.sqrt"(%3527, %3528) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3855)
+    %3530 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3856)
+    %3531 = "ttir.reciprocal"(%3529, %3530) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3856)
+    %3532 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3857)
+    %3533 = "ttir.multiply"(%3521, %3531, %3532) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3857)
+    %3534 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3858)
+    %3535 = "ttir.multiply"(%arg488, %3533, %3534) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3858)
+    %3536 = tensor.empty() : tensor<12x3200xf32> loc(#loc3859)
+    %3537 = "ttir.squeeze"(%3535, %3536) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3859)
+    %3538 = tensor.empty() : tensor<12x3200xf32> loc(#loc3860)
+    %3539 = "ttir.matmul"(%3537, %arg489, %3538) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3860)
+    %3540 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3861)
+    %3541 = "ttir.reshape"(%3539, %3540) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3861)
+    %3542 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3862)
+    %3543 = "ttir.transpose"(%3541, %3542) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3862)
+    %3544 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3863)
+    %3545 = "ttir.concat"(%arg244, %arg244, %3544) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3863)
+    %3546 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3864)
+    %3547 = "ttir.sin"(%3545, %3546) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3864)
+    %3548 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc3865)
+    %3549 = "ttir.unsqueeze"(%3547, %3548) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc3865)
+    %3550 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3866)
+    %3551 = "ttir.multiply"(%3543, %3549, %3550) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3866)
+    %3552 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3867)
+    %3553 = "ttir.transpose"(%3543, %3552) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3867)
+    %3554 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3868)
+    %3555 = "ttir.matmul"(%arg245, %3553, %3554) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3868)
+    %3556 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3869)
+    %3557 = "ttir.transpose"(%3555, %3556) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3869)
+    %3558 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3870)
+    %3559 = "ttir.multiply"(%3557, %arg246, %3558) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3870)
+    %3560 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3871)
+    %3561 = "ttir.transpose"(%3543, %3560) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3871)
+    %3562 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3872)
+    %3563 = "ttir.matmul"(%arg247, %3561, %3562) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3872)
+    %3564 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3873)
+    %3565 = "ttir.transpose"(%3563, %3564) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3873)
+    %3566 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3874)
+    %3567 = "ttir.concat"(%3559, %3565, %3566) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3874)
+    %3568 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3875)
+    %3569 = "ttir.cos"(%3545, %3568) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3875)
+    %3570 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc3876)
+    %3571 = "ttir.unsqueeze"(%3569, %3570) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc3876)
+    %3572 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3877)
+    %3573 = "ttir.multiply"(%3567, %3571, %3572) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3877)
+    %3574 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3878)
+    %3575 = "ttir.add"(%3551, %3573, %3574) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3878)
+    %3576 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3879)
+    %3577 = "ttir.squeeze"(%3575, %3576) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3879)
+    %3578 = tensor.empty() : tensor<12x3200xf32> loc(#loc3880)
+    %3579 = "ttir.matmul"(%3537, %arg490, %3578) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3880)
+    %3580 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3881)
+    %3581 = "ttir.reshape"(%3579, %3580) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3881)
+    %3582 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3882)
+    %3583 = "ttir.transpose"(%3581, %3582) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3882)
+    %3584 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3883)
+    %3585 = "ttir.multiply"(%3583, %3549, %3584) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3883)
+    %3586 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3884)
+    %3587 = "ttir.transpose"(%3583, %3586) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3884)
+    %3588 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3885)
+    %3589 = "ttir.matmul"(%arg248, %3587, %3588) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3885)
+    %3590 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3886)
+    %3591 = "ttir.transpose"(%3589, %3590) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3886)
+    %3592 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3887)
+    %3593 = "ttir.multiply"(%3591, %arg249, %3592) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3887)
+    %3594 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3888)
+    %3595 = "ttir.transpose"(%3583, %3594) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3888)
+    %3596 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3889)
+    %3597 = "ttir.matmul"(%arg250, %3595, %3596) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3889)
+    %3598 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3890)
+    %3599 = "ttir.transpose"(%3597, %3598) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3890)
+    %3600 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3891)
+    %3601 = "ttir.concat"(%3593, %3599, %3600) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3891)
+    %3602 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3892)
+    %3603 = "ttir.multiply"(%3601, %3571, %3602) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3892)
+    %3604 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3893)
+    %3605 = "ttir.add"(%3585, %3603, %3604) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3893)
+    %3606 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3894)
+    %3607 = "ttir.squeeze"(%3605, %3606) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3894)
+    %3608 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3895)
+    %3609 = "ttir.transpose"(%3607, %3608) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3895)
+    %3610 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3896)
+    %3611 = "ttir.matmul"(%3577, %3609, %3610) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3896)
+    %3612 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3897)
+    %3613 = "ttir.unsqueeze"(%3611, %3612) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3897)
+    %3614 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3898)
+    %3615 = "ttir.multiply"(%3613, %arg251, %3614) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3898)
+    %3616 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3899)
+    %3617 = "ttir.add"(%3615, %arg252, %3616) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3899)
+    %3618 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3900)
+    %3619 = "ttir.softmax"(%3617, %3618) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3900)
+    %3620 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3901)
+    %3621 = "ttir.squeeze"(%3619, %3620) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3901)
+    %3622 = tensor.empty() : tensor<12x3200xf32> loc(#loc3902)
+    %3623 = "ttir.matmul"(%3537, %arg491, %3622) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3902)
+    %3624 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3903)
+    %3625 = "ttir.reshape"(%3623, %3624) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3903)
+    %3626 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3904)
+    %3627 = "ttir.transpose"(%3625, %3626) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3904)
+    %3628 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3905)
+    %3629 = "ttir.transpose"(%3627, %3628) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3905)
+    %3630 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3906)
+    %3631 = "ttir.squeeze"(%3629, %3630) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3906)
+    %3632 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3907)
+    %3633 = "ttir.transpose"(%3631, %3632) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3907)
+    %3634 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3908)
+    %3635 = "ttir.matmul"(%3621, %3633, %3634) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3908)
+    %3636 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3909)
+    %3637 = "ttir.unsqueeze"(%3635, %3636) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3909)
+    %3638 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3910)
+    %3639 = "ttir.transpose"(%3637, %3638) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3910)
+    %3640 = tensor.empty() : tensor<12x3200xf32> loc(#loc3911)
+    %3641 = "ttir.reshape"(%3639, %3640) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3911)
+    %3642 = tensor.empty() : tensor<12x3200xf32> loc(#loc3912)
+    %3643 = "ttir.matmul"(%3641, %arg492, %3642) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3912)
+    %3644 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3913)
+    %3645 = "ttir.unsqueeze"(%3643, %3644) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3913)
+    %3646 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3914)
+    %3647 = "ttir.add"(%3521, %3645, %3646) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3914)
+    %3648 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3915)
+    %3649 = "ttir.multiply"(%3647, %3647, %3648) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3915)
+    %3650 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3916)
+    %3651 = "ttir.mean"(%3649, %3650) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3916)
+    %3652 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3917)
+    %3653 = "ttir.add"(%3651, %arg253, %3652) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3917)
+    %3654 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3918)
+    %3655 = "ttir.sqrt"(%3653, %3654) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3918)
+    %3656 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3919)
+    %3657 = "ttir.reciprocal"(%3655, %3656) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3919)
+    %3658 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3920)
+    %3659 = "ttir.multiply"(%3647, %3657, %3658) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3920)
+    %3660 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3921)
+    %3661 = "ttir.multiply"(%arg493, %3659, %3660) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3921)
+    %3662 = tensor.empty() : tensor<12x3200xf32> loc(#loc3922)
+    %3663 = "ttir.squeeze"(%3661, %3662) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3922)
+    %3664 = tensor.empty() : tensor<12x8640xf32> loc(#loc3923)
+    %3665 = "ttir.matmul"(%3663, %arg494, %3664) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc3923)
+    %3666 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3924)
+    %3667 = "ttir.unsqueeze"(%3665, %3666) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3924)
+    %3668 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3925)
+    %3669 = "ttir.sigmoid"(%3667, %3668) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3925)
+    %3670 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3926)
+    %3671 = "ttir.multiply"(%3667, %3669, %3670) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3926)
+    %3672 = tensor.empty() : tensor<12x8640xf32> loc(#loc3927)
+    %3673 = "ttir.matmul"(%3663, %arg495, %3672) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc3927)
+    %3674 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3928)
+    %3675 = "ttir.unsqueeze"(%3673, %3674) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3928)
+    %3676 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc3929)
+    %3677 = "ttir.multiply"(%3671, %3675, %3676) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc3929)
+    %3678 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3930)
+    %3679 = "ttir.matmul"(%3677, %arg496, %3678) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3930)
+    %3680 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3931)
+    %3681 = "ttir.add"(%3647, %3679, %3680) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3931)
+    %3682 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3932)
+    %3683 = "ttir.multiply"(%3681, %3681, %3682) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3932)
+    %3684 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3933)
+    %3685 = "ttir.mean"(%3683, %3684) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3933)
+    %3686 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3934)
+    %3687 = "ttir.add"(%3685, %arg254, %3686) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3934)
+    %3688 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3935)
+    %3689 = "ttir.sqrt"(%3687, %3688) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3935)
+    %3690 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3936)
+    %3691 = "ttir.reciprocal"(%3689, %3690) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3936)
+    %3692 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3937)
+    %3693 = "ttir.multiply"(%3681, %3691, %3692) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3937)
+    %3694 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3938)
+    %3695 = "ttir.multiply"(%arg497, %3693, %3694) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3938)
+    %3696 = tensor.empty() : tensor<12x3200xf32> loc(#loc3939)
+    %3697 = "ttir.squeeze"(%3695, %3696) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3939)
+    %3698 = tensor.empty() : tensor<12x3200xf32> loc(#loc3940)
+    %3699 = "ttir.matmul"(%3697, %arg498, %3698) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3940)
+    %3700 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3941)
+    %3701 = "ttir.reshape"(%3699, %3700) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3941)
+    %3702 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3942)
+    %3703 = "ttir.transpose"(%3701, %3702) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3942)
+    %3704 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3943)
+    %3705 = "ttir.concat"(%arg255, %arg255, %3704) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3943)
+    %3706 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3944)
+    %3707 = "ttir.sin"(%3705, %3706) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3944)
+    %3708 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc3945)
+    %3709 = "ttir.unsqueeze"(%3707, %3708) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc3945)
+    %3710 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3946)
+    %3711 = "ttir.multiply"(%3703, %3709, %3710) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3946)
+    %3712 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3947)
+    %3713 = "ttir.transpose"(%3703, %3712) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3947)
+    %3714 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3948)
+    %3715 = "ttir.matmul"(%arg256, %3713, %3714) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3948)
+    %3716 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3949)
+    %3717 = "ttir.transpose"(%3715, %3716) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3949)
+    %3718 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3950)
+    %3719 = "ttir.multiply"(%3717, %arg257, %3718) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3950)
+    %3720 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3951)
+    %3721 = "ttir.transpose"(%3703, %3720) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3951)
+    %3722 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3952)
+    %3723 = "ttir.matmul"(%arg258, %3721, %3722) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3952)
+    %3724 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3953)
+    %3725 = "ttir.transpose"(%3723, %3724) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3953)
+    %3726 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3954)
+    %3727 = "ttir.concat"(%3719, %3725, %3726) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3954)
+    %3728 = tensor.empty() : tensor<1x12x100xf32> loc(#loc3955)
+    %3729 = "ttir.cos"(%3705, %3728) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc3955)
+    %3730 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc3956)
+    %3731 = "ttir.unsqueeze"(%3729, %3730) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc3956)
+    %3732 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3957)
+    %3733 = "ttir.multiply"(%3727, %3731, %3732) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3957)
+    %3734 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3958)
+    %3735 = "ttir.add"(%3711, %3733, %3734) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3958)
+    %3736 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3959)
+    %3737 = "ttir.squeeze"(%3735, %3736) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3959)
+    %3738 = tensor.empty() : tensor<12x3200xf32> loc(#loc3960)
+    %3739 = "ttir.matmul"(%3697, %arg499, %3738) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3960)
+    %3740 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3961)
+    %3741 = "ttir.reshape"(%3739, %3740) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3961)
+    %3742 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3962)
+    %3743 = "ttir.transpose"(%3741, %3742) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3962)
+    %3744 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3963)
+    %3745 = "ttir.multiply"(%3743, %3709, %3744) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3963)
+    %3746 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3964)
+    %3747 = "ttir.transpose"(%3743, %3746) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3964)
+    %3748 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3965)
+    %3749 = "ttir.matmul"(%arg259, %3747, %3748) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3965)
+    %3750 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3966)
+    %3751 = "ttir.transpose"(%3749, %3750) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3966)
+    %3752 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3967)
+    %3753 = "ttir.multiply"(%3751, %arg260, %3752) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3967)
+    %3754 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3968)
+    %3755 = "ttir.transpose"(%3743, %3754) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3968)
+    %3756 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc3969)
+    %3757 = "ttir.matmul"(%arg261, %3755, %3756) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc3969)
+    %3758 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc3970)
+    %3759 = "ttir.transpose"(%3757, %3758) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc3970)
+    %3760 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3971)
+    %3761 = "ttir.concat"(%3753, %3759, %3760) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3971)
+    %3762 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3972)
+    %3763 = "ttir.multiply"(%3761, %3731, %3762) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3972)
+    %3764 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3973)
+    %3765 = "ttir.add"(%3745, %3763, %3764) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3973)
+    %3766 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3974)
+    %3767 = "ttir.squeeze"(%3765, %3766) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3974)
+    %3768 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3975)
+    %3769 = "ttir.transpose"(%3767, %3768) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3975)
+    %3770 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3976)
+    %3771 = "ttir.matmul"(%3737, %3769, %3770) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3976)
+    %3772 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3977)
+    %3773 = "ttir.unsqueeze"(%3771, %3772) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3977)
+    %3774 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3978)
+    %3775 = "ttir.multiply"(%3773, %arg262, %3774) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3978)
+    %3776 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3979)
+    %3777 = "ttir.add"(%3775, %arg263, %3776) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3979)
+    %3778 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc3980)
+    %3779 = "ttir.softmax"(%3777, %3778) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc3980)
+    %3780 = tensor.empty() : tensor<32x12x12xf32> loc(#loc3981)
+    %3781 = "ttir.squeeze"(%3779, %3780) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc3981)
+    %3782 = tensor.empty() : tensor<12x3200xf32> loc(#loc3982)
+    %3783 = "ttir.matmul"(%3697, %arg500, %3782) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3982)
+    %3784 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3983)
+    %3785 = "ttir.reshape"(%3783, %3784) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3983)
+    %3786 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3984)
+    %3787 = "ttir.transpose"(%3785, %3786) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3984)
+    %3788 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc3985)
+    %3789 = "ttir.transpose"(%3787, %3788) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc3985)
+    %3790 = tensor.empty() : tensor<32x100x12xf32> loc(#loc3986)
+    %3791 = "ttir.squeeze"(%3789, %3790) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc3986)
+    %3792 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3987)
+    %3793 = "ttir.transpose"(%3791, %3792) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3987)
+    %3794 = tensor.empty() : tensor<32x12x100xf32> loc(#loc3988)
+    %3795 = "ttir.matmul"(%3781, %3793, %3794) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc3988)
+    %3796 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc3989)
+    %3797 = "ttir.unsqueeze"(%3795, %3796) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc3989)
+    %3798 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc3990)
+    %3799 = "ttir.transpose"(%3797, %3798) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc3990)
+    %3800 = tensor.empty() : tensor<12x3200xf32> loc(#loc3991)
+    %3801 = "ttir.reshape"(%3799, %3800) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3991)
+    %3802 = tensor.empty() : tensor<12x3200xf32> loc(#loc3992)
+    %3803 = "ttir.matmul"(%3801, %arg501, %3802) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc3992)
+    %3804 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3993)
+    %3805 = "ttir.unsqueeze"(%3803, %3804) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3993)
+    %3806 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3994)
+    %3807 = "ttir.add"(%3681, %3805, %3806) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3994)
+    %3808 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc3995)
+    %3809 = "ttir.multiply"(%3807, %3807, %3808) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc3995)
+    %3810 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3996)
+    %3811 = "ttir.mean"(%3809, %3810) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3996)
+    %3812 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3997)
+    %3813 = "ttir.add"(%3811, %arg264, %3812) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3997)
+    %3814 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3998)
+    %3815 = "ttir.sqrt"(%3813, %3814) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3998)
+    %3816 = tensor.empty() : tensor<1x12x1xf32> loc(#loc3999)
+    %3817 = "ttir.reciprocal"(%3815, %3816) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc3999)
+    %3818 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4000)
+    %3819 = "ttir.multiply"(%3807, %3817, %3818) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4000)
+    %3820 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4001)
+    %3821 = "ttir.multiply"(%arg502, %3819, %3820) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4001)
+    %3822 = tensor.empty() : tensor<12x3200xf32> loc(#loc4002)
+    %3823 = "ttir.squeeze"(%3821, %3822) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc4002)
+    %3824 = tensor.empty() : tensor<12x8640xf32> loc(#loc4003)
+    %3825 = "ttir.matmul"(%3823, %arg503, %3824) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc4003)
+    %3826 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc4004)
+    %3827 = "ttir.unsqueeze"(%3825, %3826) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc4004)
+    %3828 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc4005)
+    %3829 = "ttir.sigmoid"(%3827, %3828) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc4005)
+    %3830 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc4006)
+    %3831 = "ttir.multiply"(%3827, %3829, %3830) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc4006)
+    %3832 = tensor.empty() : tensor<12x8640xf32> loc(#loc4007)
+    %3833 = "ttir.matmul"(%3823, %arg504, %3832) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc4007)
+    %3834 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc4008)
+    %3835 = "ttir.unsqueeze"(%3833, %3834) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc4008)
+    %3836 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc4009)
+    %3837 = "ttir.multiply"(%3831, %3835, %3836) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc4009)
+    %3838 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4010)
+    %3839 = "ttir.matmul"(%3837, %arg505, %3838) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4010)
+    %3840 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4011)
+    %3841 = "ttir.add"(%3807, %3839, %3840) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4011)
+    %3842 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4012)
+    %3843 = "ttir.multiply"(%3841, %3841, %3842) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4012)
+    %3844 = tensor.empty() : tensor<1x12x1xf32> loc(#loc4013)
+    %3845 = "ttir.mean"(%3843, %3844) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc4013)
+    %3846 = tensor.empty() : tensor<1x12x1xf32> loc(#loc4014)
+    %3847 = "ttir.add"(%3845, %arg265, %3846) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc4014)
+    %3848 = tensor.empty() : tensor<1x12x1xf32> loc(#loc4015)
+    %3849 = "ttir.sqrt"(%3847, %3848) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc4015)
+    %3850 = tensor.empty() : tensor<1x12x1xf32> loc(#loc4016)
+    %3851 = "ttir.reciprocal"(%3849, %3850) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc4016)
+    %3852 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4017)
+    %3853 = "ttir.multiply"(%3841, %3851, %3852) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4017)
+    %3854 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4018)
+    %3855 = "ttir.multiply"(%arg506, %3853, %3854) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4018)
+    %3856 = tensor.empty() : tensor<12x3200xf32> loc(#loc4019)
+    %3857 = "ttir.squeeze"(%3855, %3856) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc4019)
+    %3858 = tensor.empty() : tensor<12x3200xf32> loc(#loc4020)
+    %3859 = "ttir.matmul"(%3857, %arg507, %3858) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc4020)
+    %3860 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc4021)
+    %3861 = "ttir.reshape"(%3859, %3860) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc4021)
+    %3862 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc4022)
+    %3863 = "ttir.transpose"(%3861, %3862) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc4022)
+    %3864 = tensor.empty() : tensor<1x12x100xf32> loc(#loc4023)
+    %3865 = "ttir.concat"(%arg266, %arg266, %3864) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc4023)
+    %3866 = tensor.empty() : tensor<1x12x100xf32> loc(#loc4024)
+    %3867 = "ttir.sin"(%3865, %3866) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc4024)
+    %3868 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc4025)
+    %3869 = "ttir.unsqueeze"(%3867, %3868) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc4025)
+    %3870 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc4026)
+    %3871 = "ttir.multiply"(%3863, %3869, %3870) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc4026)
+    %3872 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc4027)
+    %3873 = "ttir.transpose"(%3863, %3872) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc4027)
+    %3874 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc4028)
+    %3875 = "ttir.matmul"(%arg267, %3873, %3874) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc4028)
+    %3876 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc4029)
+    %3877 = "ttir.transpose"(%3875, %3876) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc4029)
+    %3878 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc4030)
+    %3879 = "ttir.multiply"(%3877, %arg268, %3878) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc4030)
+    %3880 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc4031)
+    %3881 = "ttir.transpose"(%3863, %3880) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc4031)
+    %3882 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc4032)
+    %3883 = "ttir.matmul"(%arg269, %3881, %3882) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc4032)
+    %3884 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc4033)
+    %3885 = "ttir.transpose"(%3883, %3884) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc4033)
+    %3886 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc4034)
+    %3887 = "ttir.concat"(%3879, %3885, %3886) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc4034)
+    %3888 = tensor.empty() : tensor<1x12x100xf32> loc(#loc4035)
+    %3889 = "ttir.cos"(%3865, %3888) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc4035)
+    %3890 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc4036)
+    %3891 = "ttir.unsqueeze"(%3889, %3890) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc4036)
+    %3892 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc4037)
+    %3893 = "ttir.multiply"(%3887, %3891, %3892) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc4037)
+    %3894 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc4038)
+    %3895 = "ttir.add"(%3871, %3893, %3894) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc4038)
+    %3896 = tensor.empty() : tensor<32x12x100xf32> loc(#loc4039)
+    %3897 = "ttir.squeeze"(%3895, %3896) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc4039)
+    %3898 = tensor.empty() : tensor<12x3200xf32> loc(#loc4040)
+    %3899 = "ttir.matmul"(%3857, %arg508, %3898) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc4040)
+    %3900 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc4041)
+    %3901 = "ttir.reshape"(%3899, %3900) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc4041)
+    %3902 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc4042)
+    %3903 = "ttir.transpose"(%3901, %3902) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc4042)
+    %3904 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc4043)
+    %3905 = "ttir.multiply"(%3903, %3869, %3904) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc4043)
+    %3906 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc4044)
+    %3907 = "ttir.transpose"(%3903, %3906) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc4044)
+    %3908 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc4045)
+    %3909 = "ttir.matmul"(%arg270, %3907, %3908) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc4045)
+    %3910 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc4046)
+    %3911 = "ttir.transpose"(%3909, %3910) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc4046)
+    %3912 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc4047)
+    %3913 = "ttir.multiply"(%3911, %arg271, %3912) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc4047)
+    %3914 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc4048)
+    %3915 = "ttir.transpose"(%3903, %3914) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc4048)
+    %3916 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc4049)
+    %3917 = "ttir.matmul"(%arg272, %3915, %3916) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc4049)
+    %3918 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc4050)
+    %3919 = "ttir.transpose"(%3917, %3918) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc4050)
+    %3920 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc4051)
+    %3921 = "ttir.concat"(%3913, %3919, %3920) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc4051)
+    %3922 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc4052)
+    %3923 = "ttir.multiply"(%3921, %3891, %3922) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc4052)
+    %3924 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc4053)
+    %3925 = "ttir.add"(%3905, %3923, %3924) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc4053)
+    %3926 = tensor.empty() : tensor<32x12x100xf32> loc(#loc4054)
+    %3927 = "ttir.squeeze"(%3925, %3926) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc4054)
+    %3928 = tensor.empty() : tensor<32x100x12xf32> loc(#loc4055)
+    %3929 = "ttir.transpose"(%3927, %3928) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc4055)
+    %3930 = tensor.empty() : tensor<32x12x12xf32> loc(#loc4056)
+    %3931 = "ttir.matmul"(%3897, %3929, %3930) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc4056)
+    %3932 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc4057)
+    %3933 = "ttir.unsqueeze"(%3931, %3932) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc4057)
+    %3934 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc4058)
+    %3935 = "ttir.multiply"(%3933, %arg273, %3934) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc4058)
+    %3936 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc4059)
+    %3937 = "ttir.add"(%3935, %arg274, %3936) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc4059)
+    %3938 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc4060)
+    %3939 = "ttir.softmax"(%3937, %3938) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc4060)
+    %3940 = tensor.empty() : tensor<32x12x12xf32> loc(#loc4061)
+    %3941 = "ttir.squeeze"(%3939, %3940) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc4061)
+    %3942 = tensor.empty() : tensor<12x3200xf32> loc(#loc4062)
+    %3943 = "ttir.matmul"(%3857, %arg509, %3942) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc4062)
+    %3944 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc4063)
+    %3945 = "ttir.reshape"(%3943, %3944) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc4063)
+    %3946 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc4064)
+    %3947 = "ttir.transpose"(%3945, %3946) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc4064)
+    %3948 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc4065)
+    %3949 = "ttir.transpose"(%3947, %3948) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc4065)
+    %3950 = tensor.empty() : tensor<32x100x12xf32> loc(#loc4066)
+    %3951 = "ttir.squeeze"(%3949, %3950) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc4066)
+    %3952 = tensor.empty() : tensor<32x12x100xf32> loc(#loc4067)
+    %3953 = "ttir.transpose"(%3951, %3952) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc4067)
+    %3954 = tensor.empty() : tensor<32x12x100xf32> loc(#loc4068)
+    %3955 = "ttir.matmul"(%3941, %3953, %3954) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc4068)
+    %3956 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc4069)
+    %3957 = "ttir.unsqueeze"(%3955, %3956) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc4069)
+    %3958 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc4070)
+    %3959 = "ttir.transpose"(%3957, %3958) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc4070)
+    %3960 = tensor.empty() : tensor<12x3200xf32> loc(#loc4071)
+    %3961 = "ttir.reshape"(%3959, %3960) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc4071)
+    %3962 = tensor.empty() : tensor<12x3200xf32> loc(#loc4072)
+    %3963 = "ttir.matmul"(%3961, %arg510, %3962) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc4072)
+    %3964 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4073)
+    %3965 = "ttir.unsqueeze"(%3963, %3964) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4073)
+    %3966 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4074)
+    %3967 = "ttir.add"(%3841, %3965, %3966) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4074)
+    %3968 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4075)
+    %3969 = "ttir.multiply"(%3967, %3967, %3968) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4075)
+    %3970 = tensor.empty() : tensor<1x12x1xf32> loc(#loc4076)
+    %3971 = "ttir.mean"(%3969, %3970) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc4076)
+    %3972 = tensor.empty() : tensor<1x12x1xf32> loc(#loc4077)
+    %3973 = "ttir.add"(%3971, %arg275, %3972) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc4077)
+    %3974 = tensor.empty() : tensor<1x12x1xf32> loc(#loc4078)
+    %3975 = "ttir.sqrt"(%3973, %3974) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc4078)
+    %3976 = tensor.empty() : tensor<1x12x1xf32> loc(#loc4079)
+    %3977 = "ttir.reciprocal"(%3975, %3976) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc4079)
+    %3978 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4080)
+    %3979 = "ttir.multiply"(%3967, %3977, %3978) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4080)
+    %3980 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4081)
+    %3981 = "ttir.multiply"(%arg511, %3979, %3980) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4081)
+    %3982 = tensor.empty() : tensor<12x3200xf32> loc(#loc4082)
+    %3983 = "ttir.squeeze"(%3981, %3982) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc4082)
+    %3984 = tensor.empty() : tensor<12x8640xf32> loc(#loc4083)
+    %3985 = "ttir.matmul"(%3983, %arg512, %3984) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc4083)
+    %3986 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc4084)
+    %3987 = "ttir.unsqueeze"(%3985, %3986) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc4084)
+    %3988 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc4085)
+    %3989 = "ttir.sigmoid"(%3987, %3988) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc4085)
+    %3990 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc4086)
+    %3991 = "ttir.multiply"(%3987, %3989, %3990) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc4086)
+    %3992 = tensor.empty() : tensor<12x8640xf32> loc(#loc4087)
+    %3993 = "ttir.matmul"(%3983, %arg513, %3992) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc4087)
+    %3994 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc4088)
+    %3995 = "ttir.unsqueeze"(%3993, %3994) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc4088)
+    %3996 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc4089)
+    %3997 = "ttir.multiply"(%3991, %3995, %3996) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc4089)
+    %3998 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4090)
+    %3999 = "ttir.matmul"(%3997, %arg514, %3998) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4090)
+    %4000 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4091)
+    %4001 = "ttir.add"(%3967, %3999, %4000) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4091)
+    %4002 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4092)
+    %4003 = "ttir.multiply"(%4001, %4001, %4002) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4092)
+    %4004 = tensor.empty() : tensor<1x12x1xf32> loc(#loc4093)
+    %4005 = "ttir.mean"(%4003, %4004) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc4093)
+    %4006 = tensor.empty() : tensor<1x12x1xf32> loc(#loc4094)
+    %4007 = "ttir.add"(%4005, %arg276, %4006) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc4094)
+    %4008 = tensor.empty() : tensor<1x12x1xf32> loc(#loc4095)
+    %4009 = "ttir.sqrt"(%4007, %4008) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc4095)
+    %4010 = tensor.empty() : tensor<1x12x1xf32> loc(#loc4096)
+    %4011 = "ttir.reciprocal"(%4009, %4010) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc4096)
+    %4012 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4097)
+    %4013 = "ttir.multiply"(%4001, %4011, %4012) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4097)
+    %4014 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4098)
+    %4015 = "ttir.multiply"(%arg515, %4013, %4014) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4098)
+    %4016 = tensor.empty() : tensor<12x3200xf32> loc(#loc4099)
+    %4017 = "ttir.squeeze"(%4015, %4016) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc4099)
+    %4018 = tensor.empty() : tensor<12x3200xf32> loc(#loc4100)
+    %4019 = "ttir.matmul"(%4017, %arg516, %4018) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc4100)
+    %4020 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc4101)
+    %4021 = "ttir.reshape"(%4019, %4020) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc4101)
+    %4022 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc4102)
+    %4023 = "ttir.transpose"(%4021, %4022) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc4102)
+    %4024 = tensor.empty() : tensor<1x12x100xf32> loc(#loc4103)
+    %4025 = "ttir.concat"(%arg277, %arg277, %4024) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc4103)
+    %4026 = tensor.empty() : tensor<1x12x100xf32> loc(#loc4104)
+    %4027 = "ttir.sin"(%4025, %4026) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc4104)
+    %4028 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc4105)
+    %4029 = "ttir.unsqueeze"(%4027, %4028) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc4105)
+    %4030 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc4106)
+    %4031 = "ttir.multiply"(%4023, %4029, %4030) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc4106)
+    %4032 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc4107)
+    %4033 = "ttir.transpose"(%4023, %4032) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc4107)
+    %4034 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc4108)
+    %4035 = "ttir.matmul"(%arg278, %4033, %4034) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc4108)
+    %4036 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc4109)
+    %4037 = "ttir.transpose"(%4035, %4036) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc4109)
+    %4038 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc4110)
+    %4039 = "ttir.multiply"(%4037, %arg279, %4038) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc4110)
+    %4040 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc4111)
+    %4041 = "ttir.transpose"(%4023, %4040) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc4111)
+    %4042 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc4112)
+    %4043 = "ttir.matmul"(%arg280, %4041, %4042) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc4112)
+    %4044 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc4113)
+    %4045 = "ttir.transpose"(%4043, %4044) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc4113)
+    %4046 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc4114)
+    %4047 = "ttir.concat"(%4039, %4045, %4046) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc4114)
+    %4048 = tensor.empty() : tensor<1x12x100xf32> loc(#loc4115)
+    %4049 = "ttir.cos"(%4025, %4048) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc4115)
+    %4050 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc4116)
+    %4051 = "ttir.unsqueeze"(%4049, %4050) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc4116)
+    %4052 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc4117)
+    %4053 = "ttir.multiply"(%4047, %4051, %4052) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc4117)
+    %4054 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc4118)
+    %4055 = "ttir.add"(%4031, %4053, %4054) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc4118)
+    %4056 = tensor.empty() : tensor<32x12x100xf32> loc(#loc4119)
+    %4057 = "ttir.squeeze"(%4055, %4056) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc4119)
+    %4058 = tensor.empty() : tensor<12x3200xf32> loc(#loc4120)
+    %4059 = "ttir.matmul"(%4017, %arg517, %4058) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc4120)
+    %4060 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc4121)
+    %4061 = "ttir.reshape"(%4059, %4060) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc4121)
+    %4062 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc4122)
+    %4063 = "ttir.transpose"(%4061, %4062) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc4122)
+    %4064 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc4123)
+    %4065 = "ttir.multiply"(%4063, %4029, %4064) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc4123)
+    %4066 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc4124)
+    %4067 = "ttir.transpose"(%4063, %4066) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc4124)
+    %4068 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc4125)
+    %4069 = "ttir.matmul"(%arg281, %4067, %4068) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc4125)
+    %4070 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc4126)
+    %4071 = "ttir.transpose"(%4069, %4070) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc4126)
+    %4072 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc4127)
+    %4073 = "ttir.multiply"(%4071, %arg282, %4072) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc4127)
+    %4074 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc4128)
+    %4075 = "ttir.transpose"(%4063, %4074) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc4128)
+    %4076 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc4129)
+    %4077 = "ttir.matmul"(%arg283, %4075, %4076) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc4129)
+    %4078 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc4130)
+    %4079 = "ttir.transpose"(%4077, %4078) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc4130)
+    %4080 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc4131)
+    %4081 = "ttir.concat"(%4073, %4079, %4080) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc4131)
+    %4082 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc4132)
+    %4083 = "ttir.multiply"(%4081, %4051, %4082) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc4132)
+    %4084 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc4133)
+    %4085 = "ttir.add"(%4065, %4083, %4084) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc4133)
+    %4086 = tensor.empty() : tensor<32x12x100xf32> loc(#loc4134)
+    %4087 = "ttir.squeeze"(%4085, %4086) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc4134)
+    %4088 = tensor.empty() : tensor<32x100x12xf32> loc(#loc4135)
+    %4089 = "ttir.transpose"(%4087, %4088) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc4135)
+    %4090 = tensor.empty() : tensor<32x12x12xf32> loc(#loc4136)
+    %4091 = "ttir.matmul"(%4057, %4089, %4090) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc4136)
+    %4092 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc4137)
+    %4093 = "ttir.unsqueeze"(%4091, %4092) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc4137)
+    %4094 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc4138)
+    %4095 = "ttir.multiply"(%4093, %arg284, %4094) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc4138)
+    %4096 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc4139)
+    %4097 = "ttir.add"(%4095, %arg285, %4096) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc4139)
+    %4098 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc4140)
+    %4099 = "ttir.softmax"(%4097, %4098) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc4140)
+    %4100 = tensor.empty() : tensor<32x12x12xf32> loc(#loc4141)
+    %4101 = "ttir.squeeze"(%4099, %4100) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc4141)
+    %4102 = tensor.empty() : tensor<12x3200xf32> loc(#loc4142)
+    %4103 = "ttir.matmul"(%4017, %arg518, %4102) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc4142)
+    %4104 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc4143)
+    %4105 = "ttir.reshape"(%4103, %4104) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc4143)
+    %4106 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc4144)
+    %4107 = "ttir.transpose"(%4105, %4106) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc4144)
+    %4108 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc4145)
+    %4109 = "ttir.transpose"(%4107, %4108) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc4145)
+    %4110 = tensor.empty() : tensor<32x100x12xf32> loc(#loc4146)
+    %4111 = "ttir.squeeze"(%4109, %4110) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc4146)
+    %4112 = tensor.empty() : tensor<32x12x100xf32> loc(#loc4147)
+    %4113 = "ttir.transpose"(%4111, %4112) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc4147)
+    %4114 = tensor.empty() : tensor<32x12x100xf32> loc(#loc4148)
+    %4115 = "ttir.matmul"(%4101, %4113, %4114) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc4148)
+    %4116 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc4149)
+    %4117 = "ttir.unsqueeze"(%4115, %4116) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc4149)
+    %4118 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc4150)
+    %4119 = "ttir.transpose"(%4117, %4118) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc4150)
+    %4120 = tensor.empty() : tensor<12x3200xf32> loc(#loc4151)
+    %4121 = "ttir.reshape"(%4119, %4120) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc4151)
+    %4122 = tensor.empty() : tensor<12x3200xf32> loc(#loc4152)
+    %4123 = "ttir.matmul"(%4121, %arg519, %4122) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc4152)
+    %4124 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4153)
+    %4125 = "ttir.unsqueeze"(%4123, %4124) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4153)
+    %4126 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4154)
+    %4127 = "ttir.add"(%4001, %4125, %4126) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4154)
+    %4128 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4155)
+    %4129 = "ttir.multiply"(%4127, %4127, %4128) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4155)
+    %4130 = tensor.empty() : tensor<1x12x1xf32> loc(#loc4156)
+    %4131 = "ttir.mean"(%4129, %4130) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc4156)
+    %4132 = tensor.empty() : tensor<1x12x1xf32> loc(#loc4157)
+    %4133 = "ttir.add"(%4131, %arg286, %4132) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc4157)
+    %4134 = tensor.empty() : tensor<1x12x1xf32> loc(#loc4158)
+    %4135 = "ttir.sqrt"(%4133, %4134) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc4158)
+    %4136 = tensor.empty() : tensor<1x12x1xf32> loc(#loc4159)
+    %4137 = "ttir.reciprocal"(%4135, %4136) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc4159)
+    %4138 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4160)
+    %4139 = "ttir.multiply"(%4127, %4137, %4138) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4160)
+    %4140 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4161)
+    %4141 = "ttir.multiply"(%arg520, %4139, %4140) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4161)
+    %4142 = tensor.empty() : tensor<12x3200xf32> loc(#loc4162)
+    %4143 = "ttir.squeeze"(%4141, %4142) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc4162)
+    %4144 = tensor.empty() : tensor<12x8640xf32> loc(#loc4163)
+    %4145 = "ttir.matmul"(%4143, %arg521, %4144) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc4163)
+    %4146 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc4164)
+    %4147 = "ttir.unsqueeze"(%4145, %4146) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc4164)
+    %4148 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc4165)
+    %4149 = "ttir.sigmoid"(%4147, %4148) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc4165)
+    %4150 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc4166)
+    %4151 = "ttir.multiply"(%4147, %4149, %4150) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc4166)
+    %4152 = tensor.empty() : tensor<12x8640xf32> loc(#loc4167)
+    %4153 = "ttir.matmul"(%4143, %arg522, %4152) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc4167)
+    %4154 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc4168)
+    %4155 = "ttir.unsqueeze"(%4153, %4154) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc4168)
+    %4156 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc4169)
+    %4157 = "ttir.multiply"(%4151, %4155, %4156) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc4169)
+    %4158 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4170)
+    %4159 = "ttir.matmul"(%4157, %arg523, %4158) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4170)
+    %4160 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4171)
+    %4161 = "ttir.add"(%4127, %4159, %4160) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4171)
+    %4162 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4172)
+    %4163 = "ttir.multiply"(%4161, %4161, %4162) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4172)
+    %4164 = tensor.empty() : tensor<1x12x1xf32> loc(#loc4173)
+    %4165 = "ttir.mean"(%4163, %4164) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc4173)
+    %4166 = tensor.empty() : tensor<1x12x1xf32> loc(#loc4174)
+    %4167 = "ttir.add"(%4165, %arg287, %4166) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc4174)
+    %4168 = tensor.empty() : tensor<1x12x1xf32> loc(#loc4175)
+    %4169 = "ttir.sqrt"(%4167, %4168) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc4175)
+    %4170 = tensor.empty() : tensor<1x12x1xf32> loc(#loc4176)
+    %4171 = "ttir.reciprocal"(%4169, %4170) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc4176)
+    %4172 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4177)
+    %4173 = "ttir.multiply"(%4161, %4171, %4172) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4177)
+    %4174 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc4178)
+    %4175 = "ttir.multiply"(%arg288, %4173, %4174) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc4178)
+    %4176 = tensor.empty() : tensor<1x12x32000xf32> loc(#loc4179)
+    %4177 = "ttir.matmul"(%4175, %arg524, %4176) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<3200x32000xf32>, tensor<1x12x32000xf32>) -> tensor<1x12x32000xf32> loc(#loc4179)
+    return %4177 : tensor<1x12x32000xf32> loc(#loc2090)
+  } loc(#loc)
+} loc(#loc)
+#loc1 = loc("forward":4294967295:4184)
+#loc2 = loc("forward":4294967295:4186)
+#loc3 = loc("forward":4294967295:4187)
+#loc4 = loc("forward":4294967295:4189)
+#loc5 = loc("forward":4294967295:4190)
+#loc6 = loc("forward":4294967295:4191)
+#loc7 = loc("forward":4294967295:4192)
+#loc8 = loc("forward":4294967295:4193)
+#loc9 = loc("forward":4294967295:4194)
+#loc10 = loc("forward":4294967295:4196)
+#loc11 = loc("forward":4294967295:4197)
+#loc12 = loc("forward":4294967295:4198)
+#loc13 = loc("forward":4294967295:4200)
+#loc14 = loc("forward":4294967295:4201)
+#loc15 = loc("forward":4294967295:4202)
+#loc16 = loc("forward":4294967295:4203)
+#loc17 = loc("forward":4294967295:4205)
+#loc18 = loc("forward":4294967295:4206)
+#loc19 = loc("forward":4294967295:4207)
+#loc20 = loc("forward":4294967295:4209)
+#loc21 = loc("forward":4294967295:4211)
+#loc22 = loc("forward":4294967295:4212)
+#loc23 = loc("forward":4294967295:4213)
+#loc24 = loc("forward":4294967295:4214)
+#loc25 = loc("forward":4294967295:4215)
+#loc26 = loc("forward":4294967295:4216)
+#loc27 = loc("forward":4294967295:4217)
+#loc28 = loc("forward":4294967295:4218)
+#loc29 = loc("forward":4294967295:4219)
+#loc30 = loc("forward":4294967295:4221)
+#loc31 = loc("forward":4294967295:4222)
+#loc32 = loc("forward":4294967295:4223)
+#loc33 = loc("forward":4294967295:4224)
+#loc34 = loc("forward":4294967295:4226)
+#loc35 = loc("forward":4294967295:4227)
+#loc36 = loc("forward":4294967295:4228)
+#loc37 = loc("forward":4294967295:4230)
+#loc38 = loc("forward":4294967295:4232)
+#loc39 = loc("forward":4294967295:4233)
+#loc40 = loc("forward":4294967295:4234)
+#loc41 = loc("forward":4294967295:4235)
+#loc42 = loc("forward":4294967295:4236)
+#loc43 = loc("forward":4294967295:4237)
+#loc44 = loc("forward":4294967295:4238)
+#loc45 = loc("forward":4294967295:4239)
+#loc46 = loc("forward":4294967295:4240)
+#loc47 = loc("forward":4294967295:4241)
+#loc48 = loc("forward":4294967295:4243)
+#loc49 = loc("forward":4294967295:4245)
+#loc50 = loc("forward":4294967295:4246)
+#loc51 = loc("forward":4294967295:4247)
+#loc52 = loc("forward":4294967295:4249)
+#loc53 = loc("forward":4294967295:4250)
+#loc54 = loc("forward":4294967295:4251)
+#loc55 = loc("forward":4294967295:4252)
+#loc56 = loc("forward":4294967295:4253)
+#loc57 = loc("forward":4294967295:4254)
+#loc58 = loc("forward":4294967295:4255)
+#loc59 = loc("forward":4294967295:4256)
+#loc60 = loc("forward":4294967295:4257)
+#loc61 = loc("forward":4294967295:4258)
+#loc62 = loc("forward":4294967295:4260)
+#loc63 = loc("forward":4294967295:4261)
+#loc64 = loc("forward":4294967295:4262)
+#loc65 = loc("forward":4294967295:4264)
+#loc66 = loc("forward":4294967295:4265)
+#loc67 = loc("forward":4294967295:4267)
+#loc68 = loc("forward":4294967295:4268)
+#loc69 = loc("forward":4294967295:4269)
+#loc70 = loc("forward":4294967295:4270)
+#loc71 = loc("forward":4294967295:4271)
+#loc72 = loc("forward":4294967295:4272)
+#loc73 = loc("forward":4294967295:4274)
+#loc74 = loc("forward":4294967295:4275)
+#loc75 = loc("forward":4294967295:4276)
+#loc76 = loc("forward":4294967295:4277)
+#loc77 = loc("forward":4294967295:4279)
+#loc78 = loc("forward":4294967295:4280)
+#loc79 = loc("forward":4294967295:4281)
+#loc80 = loc("forward":4294967295:4283)
+#loc81 = loc("forward":4294967295:4284)
+#loc82 = loc("forward":4294967295:4286)
+#loc83 = loc("forward":4294967295:4287)
+#loc84 = loc("forward":4294967295:4289)
+#loc85 = loc("forward":4294967295:4290)
+#loc86 = loc("forward":4294967295:4291)
+#loc87 = loc("forward":4294967295:4292)
+#loc88 = loc("forward":4294967295:4293)
+#loc89 = loc("forward":4294967295:4294)
+#loc90 = loc("forward":4294967295:4296)
+#loc91 = loc("forward":4294967295:4297)
+#loc92 = loc("forward":4294967295:4298)
+#loc93 = loc("forward":4294967295:4300)
+#loc94 = loc("forward":4294967295:4301)
+#loc95 = loc("forward":4294967295:4302)
+#loc96 = loc("forward":4294967295:4303)
+#loc97 = loc("forward":4294967295:4305)
+#loc98 = loc("forward":4294967295:4306)
+#loc99 = loc("forward":4294967295:4307)
+#loc100 = loc("forward":4294967295:4309)
+#loc101 = loc("forward":4294967295:4311)
+#loc102 = loc("forward":4294967295:4312)
+#loc103 = loc("forward":4294967295:4313)
+#loc104 = loc("forward":4294967295:4314)
+#loc105 = loc("forward":4294967295:4315)
+#loc106 = loc("forward":4294967295:4316)
+#loc107 = loc("forward":4294967295:4317)
+#loc108 = loc("forward":4294967295:4318)
+#loc109 = loc("forward":4294967295:4319)
+#loc110 = loc("forward":4294967295:4321)
+#loc111 = loc("forward":4294967295:4322)
+#loc112 = loc("forward":4294967295:4323)
+#loc113 = loc("forward":4294967295:4324)
+#loc114 = loc("forward":4294967295:4326)
+#loc115 = loc("forward":4294967295:4327)
+#loc116 = loc("forward":4294967295:4328)
+#loc117 = loc("forward":4294967295:4330)
+#loc118 = loc("forward":4294967295:4332)
+#loc119 = loc("forward":4294967295:4333)
+#loc120 = loc("forward":4294967295:4334)
+#loc121 = loc("forward":4294967295:4335)
+#loc122 = loc("forward":4294967295:4336)
+#loc123 = loc("forward":4294967295:4337)
+#loc124 = loc("forward":4294967295:4338)
+#loc125 = loc("forward":4294967295:4339)
+#loc126 = loc("forward":4294967295:4340)
+#loc127 = loc("forward":4294967295:4341)
+#loc128 = loc("forward":4294967295:4343)
+#loc129 = loc("forward":4294967295:4345)
+#loc130 = loc("forward":4294967295:4346)
+#loc131 = loc("forward":4294967295:4347)
+#loc132 = loc("forward":4294967295:4349)
+#loc133 = loc("forward":4294967295:4350)
+#loc134 = loc("forward":4294967295:4351)
+#loc135 = loc("forward":4294967295:4352)
+#loc136 = loc("forward":4294967295:4353)
+#loc137 = loc("forward":4294967295:4354)
+#loc138 = loc("forward":4294967295:4355)
+#loc139 = loc("forward":4294967295:4356)
+#loc140 = loc("forward":4294967295:4357)
+#loc141 = loc("forward":4294967295:4358)
+#loc142 = loc("forward":4294967295:4360)
+#loc143 = loc("forward":4294967295:4361)
+#loc144 = loc("forward":4294967295:4362)
+#loc145 = loc("forward":4294967295:4364)
+#loc146 = loc("forward":4294967295:4365)
+#loc147 = loc("forward":4294967295:4367)
+#loc148 = loc("forward":4294967295:4368)
+#loc149 = loc("forward":4294967295:4369)
+#loc150 = loc("forward":4294967295:4370)
+#loc151 = loc("forward":4294967295:4371)
+#loc152 = loc("forward":4294967295:4372)
+#loc153 = loc("forward":4294967295:4374)
+#loc154 = loc("forward":4294967295:4375)
+#loc155 = loc("forward":4294967295:4376)
+#loc156 = loc("forward":4294967295:4377)
+#loc157 = loc("forward":4294967295:4379)
+#loc158 = loc("forward":4294967295:4380)
+#loc159 = loc("forward":4294967295:4381)
+#loc160 = loc("forward":4294967295:4383)
+#loc161 = loc("forward":4294967295:4384)
+#loc162 = loc("forward":4294967295:4386)
+#loc163 = loc("forward":4294967295:4387)
+#loc164 = loc("forward":4294967295:4389)
+#loc165 = loc("forward":4294967295:4390)
+#loc166 = loc("forward":4294967295:4391)
+#loc167 = loc("forward":4294967295:4392)
+#loc168 = loc("forward":4294967295:4393)
+#loc169 = loc("forward":4294967295:4394)
+#loc170 = loc("forward":4294967295:4396)
+#loc171 = loc("forward":4294967295:4397)
+#loc172 = loc("forward":4294967295:4398)
+#loc173 = loc("forward":4294967295:4400)
+#loc174 = loc("forward":4294967295:4401)
+#loc175 = loc("forward":4294967295:4402)
+#loc176 = loc("forward":4294967295:4403)
+#loc177 = loc("forward":4294967295:4405)
+#loc178 = loc("forward":4294967295:4406)
+#loc179 = loc("forward":4294967295:4407)
+#loc180 = loc("forward":4294967295:4409)
+#loc181 = loc("forward":4294967295:4411)
+#loc182 = loc("forward":4294967295:4412)
+#loc183 = loc("forward":4294967295:4413)
+#loc184 = loc("forward":4294967295:4414)
+#loc185 = loc("forward":4294967295:4415)
+#loc186 = loc("forward":4294967295:4416)
+#loc187 = loc("forward":4294967295:4417)
+#loc188 = loc("forward":4294967295:4418)
+#loc189 = loc("forward":4294967295:4419)
+#loc190 = loc("forward":4294967295:4421)
+#loc191 = loc("forward":4294967295:4422)
+#loc192 = loc("forward":4294967295:4423)
+#loc193 = loc("forward":4294967295:4424)
+#loc194 = loc("forward":4294967295:4426)
+#loc195 = loc("forward":4294967295:4427)
+#loc196 = loc("forward":4294967295:4428)
+#loc197 = loc("forward":4294967295:4430)
+#loc198 = loc("forward":4294967295:4432)
+#loc199 = loc("forward":4294967295:4433)
+#loc200 = loc("forward":4294967295:4434)
+#loc201 = loc("forward":4294967295:4435)
+#loc202 = loc("forward":4294967295:4436)
+#loc203 = loc("forward":4294967295:4437)
+#loc204 = loc("forward":4294967295:4438)
+#loc205 = loc("forward":4294967295:4439)
+#loc206 = loc("forward":4294967295:4440)
+#loc207 = loc("forward":4294967295:4441)
+#loc208 = loc("forward":4294967295:4443)
+#loc209 = loc("forward":4294967295:4445)
+#loc210 = loc("forward":4294967295:4446)
+#loc211 = loc("forward":4294967295:4447)
+#loc212 = loc("forward":4294967295:4449)
+#loc213 = loc("forward":4294967295:4450)
+#loc214 = loc("forward":4294967295:4451)
+#loc215 = loc("forward":4294967295:4452)
+#loc216 = loc("forward":4294967295:4453)
+#loc217 = loc("forward":4294967295:4454)
+#loc218 = loc("forward":4294967295:4455)
+#loc219 = loc("forward":4294967295:4456)
+#loc220 = loc("forward":4294967295:4457)
+#loc221 = loc("forward":4294967295:4458)
+#loc222 = loc("forward":4294967295:4460)
+#loc223 = loc("forward":4294967295:4461)
+#loc224 = loc("forward":4294967295:4462)
+#loc225 = loc("forward":4294967295:4464)
+#loc226 = loc("forward":4294967295:4465)
+#loc227 = loc("forward":4294967295:4467)
+#loc228 = loc("forward":4294967295:4468)
+#loc229 = loc("forward":4294967295:4469)
+#loc230 = loc("forward":4294967295:4470)
+#loc231 = loc("forward":4294967295:4471)
+#loc232 = loc("forward":4294967295:4472)
+#loc233 = loc("forward":4294967295:4474)
+#loc234 = loc("forward":4294967295:4475)
+#loc235 = loc("forward":4294967295:4476)
+#loc236 = loc("forward":4294967295:4477)
+#loc237 = loc("forward":4294967295:4479)
+#loc238 = loc("forward":4294967295:4480)
+#loc239 = loc("forward":4294967295:4481)
+#loc240 = loc("forward":4294967295:4483)
+#loc241 = loc("forward":4294967295:4484)
+#loc242 = loc("forward":4294967295:4486)
+#loc243 = loc("forward":4294967295:4487)
+#loc244 = loc("forward":4294967295:4489)
+#loc245 = loc("forward":4294967295:4490)
+#loc246 = loc("forward":4294967295:4491)
+#loc247 = loc("forward":4294967295:4492)
+#loc248 = loc("forward":4294967295:4493)
+#loc249 = loc("forward":4294967295:4494)
+#loc250 = loc("forward":4294967295:4496)
+#loc251 = loc("forward":4294967295:4497)
+#loc252 = loc("forward":4294967295:4498)
+#loc253 = loc("forward":4294967295:4500)
+#loc254 = loc("forward":4294967295:4501)
+#loc255 = loc("forward":4294967295:4502)
+#loc256 = loc("forward":4294967295:4503)
+#loc257 = loc("forward":4294967295:4505)
+#loc258 = loc("forward":4294967295:4506)
+#loc259 = loc("forward":4294967295:4507)
+#loc260 = loc("forward":4294967295:4509)
+#loc261 = loc("forward":4294967295:4511)
+#loc262 = loc("forward":4294967295:4512)
+#loc263 = loc("forward":4294967295:4513)
+#loc264 = loc("forward":4294967295:4514)
+#loc265 = loc("forward":4294967295:4515)
+#loc266 = loc("forward":4294967295:4516)
+#loc267 = loc("forward":4294967295:4517)
+#loc268 = loc("forward":4294967295:4518)
+#loc269 = loc("forward":4294967295:4519)
+#loc270 = loc("forward":4294967295:4521)
+#loc271 = loc("forward":4294967295:4522)
+#loc272 = loc("forward":4294967295:4523)
+#loc273 = loc("forward":4294967295:4524)
+#loc274 = loc("forward":4294967295:4526)
+#loc275 = loc("forward":4294967295:4527)
+#loc276 = loc("forward":4294967295:4528)
+#loc277 = loc("forward":4294967295:4530)
+#loc278 = loc("forward":4294967295:4532)
+#loc279 = loc("forward":4294967295:4533)
+#loc280 = loc("forward":4294967295:4534)
+#loc281 = loc("forward":4294967295:4535)
+#loc282 = loc("forward":4294967295:4536)
+#loc283 = loc("forward":4294967295:4537)
+#loc284 = loc("forward":4294967295:4538)
+#loc285 = loc("forward":4294967295:4539)
+#loc286 = loc("forward":4294967295:4540)
+#loc287 = loc("forward":4294967295:4541)
+#loc288 = loc("forward":4294967295:4543)
+#loc289 = loc("forward":4294967295:4545)
+#loc290 = loc("forward":4294967295:4546)
+#loc291 = loc("forward":4294967295:4547)
+#loc292 = loc("forward":4294967295:4549)
+#loc293 = loc("forward":4294967295:4550)
+#loc294 = loc("forward":4294967295:4551)
+#loc295 = loc("forward":4294967295:4552)
+#loc296 = loc("forward":4294967295:4553)
+#loc297 = loc("forward":4294967295:4554)
+#loc298 = loc("forward":4294967295:4555)
+#loc299 = loc("forward":4294967295:4556)
+#loc300 = loc("forward":4294967295:4557)
+#loc301 = loc("forward":4294967295:4558)
+#loc302 = loc("forward":4294967295:4560)
+#loc303 = loc("forward":4294967295:4561)
+#loc304 = loc("forward":4294967295:4562)
+#loc305 = loc("forward":4294967295:4564)
+#loc306 = loc("forward":4294967295:4565)
+#loc307 = loc("forward":4294967295:4567)
+#loc308 = loc("forward":4294967295:4568)
+#loc309 = loc("forward":4294967295:4569)
+#loc310 = loc("forward":4294967295:4570)
+#loc311 = loc("forward":4294967295:4571)
+#loc312 = loc("forward":4294967295:4572)
+#loc313 = loc("forward":4294967295:4574)
+#loc314 = loc("forward":4294967295:4575)
+#loc315 = loc("forward":4294967295:4576)
+#loc316 = loc("forward":4294967295:4577)
+#loc317 = loc("forward":4294967295:4579)
+#loc318 = loc("forward":4294967295:4580)
+#loc319 = loc("forward":4294967295:4581)
+#loc320 = loc("forward":4294967295:4583)
+#loc321 = loc("forward":4294967295:4584)
+#loc322 = loc("forward":4294967295:4586)
+#loc323 = loc("forward":4294967295:4587)
+#loc324 = loc("forward":4294967295:4589)
+#loc325 = loc("forward":4294967295:4590)
+#loc326 = loc("forward":4294967295:4591)
+#loc327 = loc("forward":4294967295:4592)
+#loc328 = loc("forward":4294967295:4593)
+#loc329 = loc("forward":4294967295:4594)
+#loc330 = loc("forward":4294967295:4596)
+#loc331 = loc("forward":4294967295:4597)
+#loc332 = loc("forward":4294967295:4598)
+#loc333 = loc("forward":4294967295:4600)
+#loc334 = loc("forward":4294967295:4601)
+#loc335 = loc("forward":4294967295:4602)
+#loc336 = loc("forward":4294967295:4603)
+#loc337 = loc("forward":4294967295:4605)
+#loc338 = loc("forward":4294967295:4606)
+#loc339 = loc("forward":4294967295:4607)
+#loc340 = loc("forward":4294967295:4609)
+#loc341 = loc("forward":4294967295:4611)
+#loc342 = loc("forward":4294967295:4612)
+#loc343 = loc("forward":4294967295:4613)
+#loc344 = loc("forward":4294967295:4614)
+#loc345 = loc("forward":4294967295:4615)
+#loc346 = loc("forward":4294967295:4616)
+#loc347 = loc("forward":4294967295:4617)
+#loc348 = loc("forward":4294967295:4618)
+#loc349 = loc("forward":4294967295:4619)
+#loc350 = loc("forward":4294967295:4621)
+#loc351 = loc("forward":4294967295:4622)
+#loc352 = loc("forward":4294967295:4623)
+#loc353 = loc("forward":4294967295:4624)
+#loc354 = loc("forward":4294967295:4626)
+#loc355 = loc("forward":4294967295:4627)
+#loc356 = loc("forward":4294967295:4628)
+#loc357 = loc("forward":4294967295:4630)
+#loc358 = loc("forward":4294967295:4632)
+#loc359 = loc("forward":4294967295:4633)
+#loc360 = loc("forward":4294967295:4634)
+#loc361 = loc("forward":4294967295:4635)
+#loc362 = loc("forward":4294967295:4636)
+#loc363 = loc("forward":4294967295:4637)
+#loc364 = loc("forward":4294967295:4638)
+#loc365 = loc("forward":4294967295:4639)
+#loc366 = loc("forward":4294967295:4640)
+#loc367 = loc("forward":4294967295:4641)
+#loc368 = loc("forward":4294967295:4643)
+#loc369 = loc("forward":4294967295:4645)
+#loc370 = loc("forward":4294967295:4646)
+#loc371 = loc("forward":4294967295:4647)
+#loc372 = loc("forward":4294967295:4649)
+#loc373 = loc("forward":4294967295:4650)
+#loc374 = loc("forward":4294967295:4651)
+#loc375 = loc("forward":4294967295:4652)
+#loc376 = loc("forward":4294967295:4653)
+#loc377 = loc("forward":4294967295:4654)
+#loc378 = loc("forward":4294967295:4655)
+#loc379 = loc("forward":4294967295:4656)
+#loc380 = loc("forward":4294967295:4657)
+#loc381 = loc("forward":4294967295:4658)
+#loc382 = loc("forward":4294967295:4660)
+#loc383 = loc("forward":4294967295:4661)
+#loc384 = loc("forward":4294967295:4662)
+#loc385 = loc("forward":4294967295:4664)
+#loc386 = loc("forward":4294967295:4665)
+#loc387 = loc("forward":4294967295:4667)
+#loc388 = loc("forward":4294967295:4668)
+#loc389 = loc("forward":4294967295:4669)
+#loc390 = loc("forward":4294967295:4670)
+#loc391 = loc("forward":4294967295:4671)
+#loc392 = loc("forward":4294967295:4672)
+#loc393 = loc("forward":4294967295:4674)
+#loc394 = loc("forward":4294967295:4675)
+#loc395 = loc("forward":4294967295:4676)
+#loc396 = loc("forward":4294967295:4677)
+#loc397 = loc("forward":4294967295:4679)
+#loc398 = loc("forward":4294967295:4680)
+#loc399 = loc("forward":4294967295:4681)
+#loc400 = loc("forward":4294967295:4683)
+#loc401 = loc("forward":4294967295:4684)
+#loc402 = loc("forward":4294967295:4686)
+#loc403 = loc("forward":4294967295:4687)
+#loc404 = loc("forward":4294967295:4689)
+#loc405 = loc("forward":4294967295:4690)
+#loc406 = loc("forward":4294967295:4691)
+#loc407 = loc("forward":4294967295:4692)
+#loc408 = loc("forward":4294967295:4693)
+#loc409 = loc("forward":4294967295:4694)
+#loc410 = loc("forward":4294967295:4696)
+#loc411 = loc("forward":4294967295:4697)
+#loc412 = loc("forward":4294967295:4698)
+#loc413 = loc("forward":4294967295:4700)
+#loc414 = loc("forward":4294967295:4701)
+#loc415 = loc("forward":4294967295:4702)
+#loc416 = loc("forward":4294967295:4703)
+#loc417 = loc("forward":4294967295:4705)
+#loc418 = loc("forward":4294967295:4706)
+#loc419 = loc("forward":4294967295:4707)
+#loc420 = loc("forward":4294967295:4709)
+#loc421 = loc("forward":4294967295:4711)
+#loc422 = loc("forward":4294967295:4712)
+#loc423 = loc("forward":4294967295:4713)
+#loc424 = loc("forward":4294967295:4714)
+#loc425 = loc("forward":4294967295:4715)
+#loc426 = loc("forward":4294967295:4716)
+#loc427 = loc("forward":4294967295:4717)
+#loc428 = loc("forward":4294967295:4718)
+#loc429 = loc("forward":4294967295:4719)
+#loc430 = loc("forward":4294967295:4721)
+#loc431 = loc("forward":4294967295:4722)
+#loc432 = loc("forward":4294967295:4723)
+#loc433 = loc("forward":4294967295:4724)
+#loc434 = loc("forward":4294967295:4726)
+#loc435 = loc("forward":4294967295:4727)
+#loc436 = loc("forward":4294967295:4728)
+#loc437 = loc("forward":4294967295:4730)
+#loc438 = loc("forward":4294967295:4732)
+#loc439 = loc("forward":4294967295:4733)
+#loc440 = loc("forward":4294967295:4734)
+#loc441 = loc("forward":4294967295:4735)
+#loc442 = loc("forward":4294967295:4736)
+#loc443 = loc("forward":4294967295:4737)
+#loc444 = loc("forward":4294967295:4738)
+#loc445 = loc("forward":4294967295:4739)
+#loc446 = loc("forward":4294967295:4740)
+#loc447 = loc("forward":4294967295:4741)
+#loc448 = loc("forward":4294967295:4743)
+#loc449 = loc("forward":4294967295:4745)
+#loc450 = loc("forward":4294967295:4746)
+#loc451 = loc("forward":4294967295:4747)
+#loc452 = loc("forward":4294967295:4749)
+#loc453 = loc("forward":4294967295:4750)
+#loc454 = loc("forward":4294967295:4751)
+#loc455 = loc("forward":4294967295:4752)
+#loc456 = loc("forward":4294967295:4753)
+#loc457 = loc("forward":4294967295:4754)
+#loc458 = loc("forward":4294967295:4755)
+#loc459 = loc("forward":4294967295:4756)
+#loc460 = loc("forward":4294967295:4757)
+#loc461 = loc("forward":4294967295:4758)
+#loc462 = loc("forward":4294967295:4760)
+#loc463 = loc("forward":4294967295:4761)
+#loc464 = loc("forward":4294967295:4762)
+#loc465 = loc("forward":4294967295:4764)
+#loc466 = loc("forward":4294967295:4765)
+#loc467 = loc("forward":4294967295:4767)
+#loc468 = loc("forward":4294967295:4768)
+#loc469 = loc("forward":4294967295:4769)
+#loc470 = loc("forward":4294967295:4770)
+#loc471 = loc("forward":4294967295:4771)
+#loc472 = loc("forward":4294967295:4772)
+#loc473 = loc("forward":4294967295:4774)
+#loc474 = loc("forward":4294967295:4775)
+#loc475 = loc("forward":4294967295:4776)
+#loc476 = loc("forward":4294967295:4777)
+#loc477 = loc("forward":4294967295:4779)
+#loc478 = loc("forward":4294967295:4780)
+#loc479 = loc("forward":4294967295:4781)
+#loc480 = loc("forward":4294967295:4783)
+#loc481 = loc("forward":4294967295:4784)
+#loc482 = loc("forward":4294967295:4786)
+#loc483 = loc("forward":4294967295:4787)
+#loc484 = loc("forward":4294967295:4789)
+#loc485 = loc("forward":4294967295:4790)
+#loc486 = loc("forward":4294967295:4791)
+#loc487 = loc("forward":4294967295:4792)
+#loc488 = loc("forward":4294967295:4793)
+#loc489 = loc("forward":4294967295:4794)
+#loc490 = loc("forward":4294967295:4796)
+#loc491 = loc("forward":4294967295:4797)
+#loc492 = loc("forward":4294967295:4798)
+#loc493 = loc("forward":4294967295:4800)
+#loc494 = loc("forward":4294967295:4801)
+#loc495 = loc("forward":4294967295:4802)
+#loc496 = loc("forward":4294967295:4803)
+#loc497 = loc("forward":4294967295:4805)
+#loc498 = loc("forward":4294967295:4806)
+#loc499 = loc("forward":4294967295:4807)
+#loc500 = loc("forward":4294967295:4809)
+#loc501 = loc("forward":4294967295:4811)
+#loc502 = loc("forward":4294967295:4812)
+#loc503 = loc("forward":4294967295:4813)
+#loc504 = loc("forward":4294967295:4814)
+#loc505 = loc("forward":4294967295:4815)
+#loc506 = loc("forward":4294967295:4816)
+#loc507 = loc("forward":4294967295:4817)
+#loc508 = loc("forward":4294967295:4818)
+#loc509 = loc("forward":4294967295:4819)
+#loc510 = loc("forward":4294967295:4821)
+#loc511 = loc("forward":4294967295:4822)
+#loc512 = loc("forward":4294967295:4823)
+#loc513 = loc("forward":4294967295:4824)
+#loc514 = loc("forward":4294967295:4826)
+#loc515 = loc("forward":4294967295:4827)
+#loc516 = loc("forward":4294967295:4828)
+#loc517 = loc("forward":4294967295:4830)
+#loc518 = loc("forward":4294967295:4832)
+#loc519 = loc("forward":4294967295:4833)
+#loc520 = loc("forward":4294967295:4834)
+#loc521 = loc("forward":4294967295:4835)
+#loc522 = loc("forward":4294967295:4836)
+#loc523 = loc("forward":4294967295:4837)
+#loc524 = loc("forward":4294967295:4838)
+#loc525 = loc("forward":4294967295:4839)
+#loc526 = loc("forward":4294967295:4840)
+#loc527 = loc("forward":4294967295:4841)
+#loc528 = loc("forward":4294967295:4843)
+#loc529 = loc("forward":4294967295:4845)
+#loc530 = loc("forward":4294967295:4846)
+#loc531 = loc("forward":4294967295:4847)
+#loc532 = loc("forward":4294967295:4849)
+#loc533 = loc("forward":4294967295:4850)
+#loc534 = loc("forward":4294967295:4851)
+#loc535 = loc("forward":4294967295:4852)
+#loc536 = loc("forward":4294967295:4853)
+#loc537 = loc("forward":4294967295:4854)
+#loc538 = loc("forward":4294967295:4855)
+#loc539 = loc("forward":4294967295:4856)
+#loc540 = loc("forward":4294967295:4857)
+#loc541 = loc("forward":4294967295:4858)
+#loc542 = loc("forward":4294967295:4860)
+#loc543 = loc("forward":4294967295:4861)
+#loc544 = loc("forward":4294967295:4862)
+#loc545 = loc("forward":4294967295:4864)
+#loc546 = loc("forward":4294967295:4865)
+#loc547 = loc("forward":4294967295:4867)
+#loc548 = loc("forward":4294967295:4868)
+#loc549 = loc("forward":4294967295:4869)
+#loc550 = loc("forward":4294967295:4870)
+#loc551 = loc("forward":4294967295:4871)
+#loc552 = loc("forward":4294967295:4872)
+#loc553 = loc("forward":4294967295:4874)
+#loc554 = loc("forward":4294967295:4875)
+#loc555 = loc("forward":4294967295:4876)
+#loc556 = loc("forward":4294967295:4877)
+#loc557 = loc("forward":4294967295:4879)
+#loc558 = loc("forward":4294967295:4880)
+#loc559 = loc("forward":4294967295:4881)
+#loc560 = loc("forward":4294967295:4883)
+#loc561 = loc("forward":4294967295:4884)
+#loc562 = loc("forward":4294967295:4886)
+#loc563 = loc("forward":4294967295:4887)
+#loc564 = loc("forward":4294967295:4889)
+#loc565 = loc("forward":4294967295:4890)
+#loc566 = loc("forward":4294967295:4891)
+#loc567 = loc("forward":4294967295:4892)
+#loc568 = loc("forward":4294967295:4893)
+#loc569 = loc("forward":4294967295:4894)
+#loc570 = loc("forward":4294967295:4896)
+#loc571 = loc("forward":4294967295:4897)
+#loc572 = loc("forward":4294967295:4898)
+#loc573 = loc("forward":4294967295:4900)
+#loc574 = loc("forward":4294967295:4901)
+#loc575 = loc("forward":4294967295:4902)
+#loc576 = loc("forward":4294967295:4903)
+#loc577 = loc("forward":4294967295:4905)
+#loc578 = loc("forward":4294967295:4906)
+#loc579 = loc("forward":4294967295:4907)
+#loc580 = loc("forward":4294967295:4909)
+#loc581 = loc("forward":4294967295:4911)
+#loc582 = loc("forward":4294967295:4912)
+#loc583 = loc("forward":4294967295:4913)
+#loc584 = loc("forward":4294967295:4914)
+#loc585 = loc("forward":4294967295:4915)
+#loc586 = loc("forward":4294967295:4916)
+#loc587 = loc("forward":4294967295:4917)
+#loc588 = loc("forward":4294967295:4918)
+#loc589 = loc("forward":4294967295:4919)
+#loc590 = loc("forward":4294967295:4921)
+#loc591 = loc("forward":4294967295:4922)
+#loc592 = loc("forward":4294967295:4923)
+#loc593 = loc("forward":4294967295:4924)
+#loc594 = loc("forward":4294967295:4926)
+#loc595 = loc("forward":4294967295:4927)
+#loc596 = loc("forward":4294967295:4928)
+#loc597 = loc("forward":4294967295:4930)
+#loc598 = loc("forward":4294967295:4932)
+#loc599 = loc("forward":4294967295:4933)
+#loc600 = loc("forward":4294967295:4934)
+#loc601 = loc("forward":4294967295:4935)
+#loc602 = loc("forward":4294967295:4936)
+#loc603 = loc("forward":4294967295:4937)
+#loc604 = loc("forward":4294967295:4938)
+#loc605 = loc("forward":4294967295:4939)
+#loc606 = loc("forward":4294967295:4940)
+#loc607 = loc("forward":4294967295:4941)
+#loc608 = loc("forward":4294967295:4943)
+#loc609 = loc("forward":4294967295:4945)
+#loc610 = loc("forward":4294967295:4946)
+#loc611 = loc("forward":4294967295:4947)
+#loc612 = loc("forward":4294967295:4949)
+#loc613 = loc("forward":4294967295:4950)
+#loc614 = loc("forward":4294967295:4951)
+#loc615 = loc("forward":4294967295:4952)
+#loc616 = loc("forward":4294967295:4953)
+#loc617 = loc("forward":4294967295:4954)
+#loc618 = loc("forward":4294967295:4955)
+#loc619 = loc("forward":4294967295:4956)
+#loc620 = loc("forward":4294967295:4957)
+#loc621 = loc("forward":4294967295:4958)
+#loc622 = loc("forward":4294967295:4960)
+#loc623 = loc("forward":4294967295:4961)
+#loc624 = loc("forward":4294967295:4962)
+#loc625 = loc("forward":4294967295:4964)
+#loc626 = loc("forward":4294967295:4965)
+#loc627 = loc("forward":4294967295:4967)
+#loc628 = loc("forward":4294967295:4968)
+#loc629 = loc("forward":4294967295:4969)
+#loc630 = loc("forward":4294967295:4970)
+#loc631 = loc("forward":4294967295:4971)
+#loc632 = loc("forward":4294967295:4972)
+#loc633 = loc("forward":4294967295:4974)
+#loc634 = loc("forward":4294967295:4975)
+#loc635 = loc("forward":4294967295:4976)
+#loc636 = loc("forward":4294967295:4977)
+#loc637 = loc("forward":4294967295:4979)
+#loc638 = loc("forward":4294967295:4980)
+#loc639 = loc("forward":4294967295:4981)
+#loc640 = loc("forward":4294967295:4983)
+#loc641 = loc("forward":4294967295:4984)
+#loc642 = loc("forward":4294967295:4986)
+#loc643 = loc("forward":4294967295:4987)
+#loc644 = loc("forward":4294967295:4989)
+#loc645 = loc("forward":4294967295:4990)
+#loc646 = loc("forward":4294967295:4991)
+#loc647 = loc("forward":4294967295:4992)
+#loc648 = loc("forward":4294967295:4993)
+#loc649 = loc("forward":4294967295:4994)
+#loc650 = loc("forward":4294967295:4996)
+#loc651 = loc("forward":4294967295:4997)
+#loc652 = loc("forward":4294967295:4998)
+#loc653 = loc("forward":4294967295:5000)
+#loc654 = loc("forward":4294967295:5001)
+#loc655 = loc("forward":4294967295:5002)
+#loc656 = loc("forward":4294967295:5003)
+#loc657 = loc("forward":4294967295:5005)
+#loc658 = loc("forward":4294967295:5006)
+#loc659 = loc("forward":4294967295:5007)
+#loc660 = loc("forward":4294967295:5009)
+#loc661 = loc("forward":4294967295:5011)
+#loc662 = loc("forward":4294967295:5012)
+#loc663 = loc("forward":4294967295:5013)
+#loc664 = loc("forward":4294967295:5014)
+#loc665 = loc("forward":4294967295:5015)
+#loc666 = loc("forward":4294967295:5016)
+#loc667 = loc("forward":4294967295:5017)
+#loc668 = loc("forward":4294967295:5018)
+#loc669 = loc("forward":4294967295:5019)
+#loc670 = loc("forward":4294967295:5021)
+#loc671 = loc("forward":4294967295:5022)
+#loc672 = loc("forward":4294967295:5023)
+#loc673 = loc("forward":4294967295:5024)
+#loc674 = loc("forward":4294967295:5026)
+#loc675 = loc("forward":4294967295:5027)
+#loc676 = loc("forward":4294967295:5028)
+#loc677 = loc("forward":4294967295:5030)
+#loc678 = loc("forward":4294967295:5032)
+#loc679 = loc("forward":4294967295:5033)
+#loc680 = loc("forward":4294967295:5034)
+#loc681 = loc("forward":4294967295:5035)
+#loc682 = loc("forward":4294967295:5036)
+#loc683 = loc("forward":4294967295:5037)
+#loc684 = loc("forward":4294967295:5038)
+#loc685 = loc("forward":4294967295:5039)
+#loc686 = loc("forward":4294967295:5040)
+#loc687 = loc("forward":4294967295:5041)
+#loc688 = loc("forward":4294967295:5043)
+#loc689 = loc("forward":4294967295:5045)
+#loc690 = loc("forward":4294967295:5046)
+#loc691 = loc("forward":4294967295:5047)
+#loc692 = loc("forward":4294967295:5049)
+#loc693 = loc("forward":4294967295:5050)
+#loc694 = loc("forward":4294967295:5051)
+#loc695 = loc("forward":4294967295:5052)
+#loc696 = loc("forward":4294967295:5053)
+#loc697 = loc("forward":4294967295:5054)
+#loc698 = loc("forward":4294967295:5055)
+#loc699 = loc("forward":4294967295:5056)
+#loc700 = loc("forward":4294967295:5057)
+#loc701 = loc("forward":4294967295:5058)
+#loc702 = loc("forward":4294967295:5060)
+#loc703 = loc("forward":4294967295:5061)
+#loc704 = loc("forward":4294967295:5062)
+#loc705 = loc("forward":4294967295:5064)
+#loc706 = loc("forward":4294967295:5065)
+#loc707 = loc("forward":4294967295:5067)
+#loc708 = loc("forward":4294967295:5068)
+#loc709 = loc("forward":4294967295:5069)
+#loc710 = loc("forward":4294967295:5070)
+#loc711 = loc("forward":4294967295:5071)
+#loc712 = loc("forward":4294967295:5072)
+#loc713 = loc("forward":4294967295:5074)
+#loc714 = loc("forward":4294967295:5075)
+#loc715 = loc("forward":4294967295:5076)
+#loc716 = loc("forward":4294967295:5077)
+#loc717 = loc("forward":4294967295:5079)
+#loc718 = loc("forward":4294967295:5080)
+#loc719 = loc("forward":4294967295:5081)
+#loc720 = loc("forward":4294967295:5083)
+#loc721 = loc("forward":4294967295:5084)
+#loc722 = loc("forward":4294967295:5086)
+#loc723 = loc("forward":4294967295:5087)
+#loc724 = loc("forward":4294967295:5089)
+#loc725 = loc("forward":4294967295:5090)
+#loc726 = loc("forward":4294967295:5091)
+#loc727 = loc("forward":4294967295:5092)
+#loc728 = loc("forward":4294967295:5093)
+#loc729 = loc("forward":4294967295:5094)
+#loc730 = loc("forward":4294967295:5096)
+#loc731 = loc("forward":4294967295:5097)
+#loc732 = loc("forward":4294967295:5098)
+#loc733 = loc("forward":4294967295:5100)
+#loc734 = loc("forward":4294967295:5101)
+#loc735 = loc("forward":4294967295:5102)
+#loc736 = loc("forward":4294967295:5103)
+#loc737 = loc("forward":4294967295:5105)
+#loc738 = loc("forward":4294967295:5106)
+#loc739 = loc("forward":4294967295:5107)
+#loc740 = loc("forward":4294967295:5109)
+#loc741 = loc("forward":4294967295:5111)
+#loc742 = loc("forward":4294967295:5112)
+#loc743 = loc("forward":4294967295:5113)
+#loc744 = loc("forward":4294967295:5114)
+#loc745 = loc("forward":4294967295:5115)
+#loc746 = loc("forward":4294967295:5116)
+#loc747 = loc("forward":4294967295:5117)
+#loc748 = loc("forward":4294967295:5118)
+#loc749 = loc("forward":4294967295:5119)
+#loc750 = loc("forward":4294967295:5121)
+#loc751 = loc("forward":4294967295:5122)
+#loc752 = loc("forward":4294967295:5123)
+#loc753 = loc("forward":4294967295:5124)
+#loc754 = loc("forward":4294967295:5126)
+#loc755 = loc("forward":4294967295:5127)
+#loc756 = loc("forward":4294967295:5128)
+#loc757 = loc("forward":4294967295:5130)
+#loc758 = loc("forward":4294967295:5132)
+#loc759 = loc("forward":4294967295:5133)
+#loc760 = loc("forward":4294967295:5134)
+#loc761 = loc("forward":4294967295:5135)
+#loc762 = loc("forward":4294967295:5136)
+#loc763 = loc("forward":4294967295:5137)
+#loc764 = loc("forward":4294967295:5138)
+#loc765 = loc("forward":4294967295:5139)
+#loc766 = loc("forward":4294967295:5140)
+#loc767 = loc("forward":4294967295:5141)
+#loc768 = loc("forward":4294967295:5143)
+#loc769 = loc("forward":4294967295:5145)
+#loc770 = loc("forward":4294967295:5146)
+#loc771 = loc("forward":4294967295:5147)
+#loc772 = loc("forward":4294967295:5149)
+#loc773 = loc("forward":4294967295:5150)
+#loc774 = loc("forward":4294967295:5151)
+#loc775 = loc("forward":4294967295:5152)
+#loc776 = loc("forward":4294967295:5153)
+#loc777 = loc("forward":4294967295:5154)
+#loc778 = loc("forward":4294967295:5155)
+#loc779 = loc("forward":4294967295:5156)
+#loc780 = loc("forward":4294967295:5157)
+#loc781 = loc("forward":4294967295:5158)
+#loc782 = loc("forward":4294967295:5160)
+#loc783 = loc("forward":4294967295:5161)
+#loc784 = loc("forward":4294967295:5162)
+#loc785 = loc("forward":4294967295:5164)
+#loc786 = loc("forward":4294967295:5165)
+#loc787 = loc("forward":4294967295:5167)
+#loc788 = loc("forward":4294967295:5168)
+#loc789 = loc("forward":4294967295:5169)
+#loc790 = loc("forward":4294967295:5170)
+#loc791 = loc("forward":4294967295:5171)
+#loc792 = loc("forward":4294967295:5172)
+#loc793 = loc("forward":4294967295:5174)
+#loc794 = loc("forward":4294967295:5175)
+#loc795 = loc("forward":4294967295:5176)
+#loc796 = loc("forward":4294967295:5177)
+#loc797 = loc("forward":4294967295:5179)
+#loc798 = loc("forward":4294967295:5180)
+#loc799 = loc("forward":4294967295:5181)
+#loc800 = loc("forward":4294967295:5183)
+#loc801 = loc("forward":4294967295:5184)
+#loc802 = loc("forward":4294967295:5186)
+#loc803 = loc("forward":4294967295:5187)
+#loc804 = loc("forward":4294967295:5189)
+#loc805 = loc("forward":4294967295:5190)
+#loc806 = loc("forward":4294967295:5191)
+#loc807 = loc("forward":4294967295:5192)
+#loc808 = loc("forward":4294967295:5193)
+#loc809 = loc("forward":4294967295:5194)
+#loc810 = loc("forward":4294967295:5196)
+#loc811 = loc("forward":4294967295:5197)
+#loc812 = loc("forward":4294967295:5198)
+#loc813 = loc("forward":4294967295:5200)
+#loc814 = loc("forward":4294967295:5201)
+#loc815 = loc("forward":4294967295:5202)
+#loc816 = loc("forward":4294967295:5203)
+#loc817 = loc("forward":4294967295:5205)
+#loc818 = loc("forward":4294967295:5206)
+#loc819 = loc("forward":4294967295:5207)
+#loc820 = loc("forward":4294967295:5209)
+#loc821 = loc("forward":4294967295:5211)
+#loc822 = loc("forward":4294967295:5212)
+#loc823 = loc("forward":4294967295:5213)
+#loc824 = loc("forward":4294967295:5214)
+#loc825 = loc("forward":4294967295:5215)
+#loc826 = loc("forward":4294967295:5216)
+#loc827 = loc("forward":4294967295:5217)
+#loc828 = loc("forward":4294967295:5218)
+#loc829 = loc("forward":4294967295:5219)
+#loc830 = loc("forward":4294967295:5221)
+#loc831 = loc("forward":4294967295:5222)
+#loc832 = loc("forward":4294967295:5223)
+#loc833 = loc("forward":4294967295:5224)
+#loc834 = loc("forward":4294967295:5226)
+#loc835 = loc("forward":4294967295:5227)
+#loc836 = loc("forward":4294967295:5228)
+#loc837 = loc("forward":4294967295:5230)
+#loc838 = loc("forward":4294967295:5232)
+#loc839 = loc("forward":4294967295:5233)
+#loc840 = loc("forward":4294967295:5234)
+#loc841 = loc("forward":4294967295:5235)
+#loc842 = loc("forward":4294967295:5236)
+#loc843 = loc("forward":4294967295:5237)
+#loc844 = loc("forward":4294967295:5238)
+#loc845 = loc("forward":4294967295:5239)
+#loc846 = loc("forward":4294967295:5240)
+#loc847 = loc("forward":4294967295:5241)
+#loc848 = loc("forward":4294967295:5243)
+#loc849 = loc("forward":4294967295:5245)
+#loc850 = loc("forward":4294967295:5246)
+#loc851 = loc("forward":4294967295:5247)
+#loc852 = loc("forward":4294967295:5249)
+#loc853 = loc("forward":4294967295:5250)
+#loc854 = loc("forward":4294967295:5251)
+#loc855 = loc("forward":4294967295:5252)
+#loc856 = loc("forward":4294967295:5253)
+#loc857 = loc("forward":4294967295:5254)
+#loc858 = loc("forward":4294967295:5255)
+#loc859 = loc("forward":4294967295:5256)
+#loc860 = loc("forward":4294967295:5257)
+#loc861 = loc("forward":4294967295:5258)
+#loc862 = loc("forward":4294967295:5260)
+#loc863 = loc("forward":4294967295:5261)
+#loc864 = loc("forward":4294967295:5262)
+#loc865 = loc("forward":4294967295:5264)
+#loc866 = loc("forward":4294967295:5265)
+#loc867 = loc("forward":4294967295:5267)
+#loc868 = loc("forward":4294967295:5268)
+#loc869 = loc("forward":4294967295:5269)
+#loc870 = loc("forward":4294967295:5270)
+#loc871 = loc("forward":4294967295:5271)
+#loc872 = loc("forward":4294967295:5272)
+#loc873 = loc("forward":4294967295:5274)
+#loc874 = loc("forward":4294967295:5275)
+#loc875 = loc("forward":4294967295:5276)
+#loc876 = loc("forward":4294967295:5277)
+#loc877 = loc("forward":4294967295:5279)
+#loc878 = loc("forward":4294967295:5280)
+#loc879 = loc("forward":4294967295:5281)
+#loc880 = loc("forward":4294967295:5283)
+#loc881 = loc("forward":4294967295:5284)
+#loc882 = loc("forward":4294967295:5286)
+#loc883 = loc("forward":4294967295:5287)
+#loc884 = loc("forward":4294967295:5289)
+#loc885 = loc("forward":4294967295:5290)
+#loc886 = loc("forward":4294967295:5291)
+#loc887 = loc("forward":4294967295:5292)
+#loc888 = loc("forward":4294967295:5293)
+#loc889 = loc("forward":4294967295:5294)
+#loc890 = loc("forward":4294967295:5296)
+#loc891 = loc("forward":4294967295:5297)
+#loc892 = loc("forward":4294967295:5298)
+#loc893 = loc("forward":4294967295:5300)
+#loc894 = loc("forward":4294967295:5301)
+#loc895 = loc("forward":4294967295:5302)
+#loc896 = loc("forward":4294967295:5303)
+#loc897 = loc("forward":4294967295:5305)
+#loc898 = loc("forward":4294967295:5306)
+#loc899 = loc("forward":4294967295:5307)
+#loc900 = loc("forward":4294967295:5309)
+#loc901 = loc("forward":4294967295:5311)
+#loc902 = loc("forward":4294967295:5312)
+#loc903 = loc("forward":4294967295:5313)
+#loc904 = loc("forward":4294967295:5314)
+#loc905 = loc("forward":4294967295:5315)
+#loc906 = loc("forward":4294967295:5316)
+#loc907 = loc("forward":4294967295:5317)
+#loc908 = loc("forward":4294967295:5318)
+#loc909 = loc("forward":4294967295:5319)
+#loc910 = loc("forward":4294967295:5321)
+#loc911 = loc("forward":4294967295:5322)
+#loc912 = loc("forward":4294967295:5323)
+#loc913 = loc("forward":4294967295:5324)
+#loc914 = loc("forward":4294967295:5326)
+#loc915 = loc("forward":4294967295:5327)
+#loc916 = loc("forward":4294967295:5328)
+#loc917 = loc("forward":4294967295:5330)
+#loc918 = loc("forward":4294967295:5332)
+#loc919 = loc("forward":4294967295:5333)
+#loc920 = loc("forward":4294967295:5334)
+#loc921 = loc("forward":4294967295:5335)
+#loc922 = loc("forward":4294967295:5336)
+#loc923 = loc("forward":4294967295:5337)
+#loc924 = loc("forward":4294967295:5338)
+#loc925 = loc("forward":4294967295:5339)
+#loc926 = loc("forward":4294967295:5340)
+#loc927 = loc("forward":4294967295:5341)
+#loc928 = loc("forward":4294967295:5343)
+#loc929 = loc("forward":4294967295:5345)
+#loc930 = loc("forward":4294967295:5346)
+#loc931 = loc("forward":4294967295:5347)
+#loc932 = loc("forward":4294967295:5349)
+#loc933 = loc("forward":4294967295:5350)
+#loc934 = loc("forward":4294967295:5351)
+#loc935 = loc("forward":4294967295:5352)
+#loc936 = loc("forward":4294967295:5353)
+#loc937 = loc("forward":4294967295:5354)
+#loc938 = loc("forward":4294967295:5355)
+#loc939 = loc("forward":4294967295:5356)
+#loc940 = loc("forward":4294967295:5357)
+#loc941 = loc("forward":4294967295:5358)
+#loc942 = loc("forward":4294967295:5360)
+#loc943 = loc("forward":4294967295:5361)
+#loc944 = loc("forward":4294967295:5362)
+#loc945 = loc("forward":4294967295:5364)
+#loc946 = loc("forward":4294967295:5365)
+#loc947 = loc("forward":4294967295:5367)
+#loc948 = loc("forward":4294967295:5368)
+#loc949 = loc("forward":4294967295:5369)
+#loc950 = loc("forward":4294967295:5370)
+#loc951 = loc("forward":4294967295:5371)
+#loc952 = loc("forward":4294967295:5372)
+#loc953 = loc("forward":4294967295:5374)
+#loc954 = loc("forward":4294967295:5375)
+#loc955 = loc("forward":4294967295:5376)
+#loc956 = loc("forward":4294967295:5377)
+#loc957 = loc("forward":4294967295:5379)
+#loc958 = loc("forward":4294967295:5380)
+#loc959 = loc("forward":4294967295:5381)
+#loc960 = loc("forward":4294967295:5383)
+#loc961 = loc("forward":4294967295:5384)
+#loc962 = loc("forward":4294967295:5386)
+#loc963 = loc("forward":4294967295:5387)
+#loc964 = loc("forward":4294967295:5389)
+#loc965 = loc("forward":4294967295:5390)
+#loc966 = loc("forward":4294967295:5391)
+#loc967 = loc("forward":4294967295:5392)
+#loc968 = loc("forward":4294967295:5393)
+#loc969 = loc("forward":4294967295:5394)
+#loc970 = loc("forward":4294967295:5396)
+#loc971 = loc("forward":4294967295:5397)
+#loc972 = loc("forward":4294967295:5398)
+#loc973 = loc("forward":4294967295:5400)
+#loc974 = loc("forward":4294967295:5401)
+#loc975 = loc("forward":4294967295:5402)
+#loc976 = loc("forward":4294967295:5403)
+#loc977 = loc("forward":4294967295:5405)
+#loc978 = loc("forward":4294967295:5406)
+#loc979 = loc("forward":4294967295:5407)
+#loc980 = loc("forward":4294967295:5409)
+#loc981 = loc("forward":4294967295:5411)
+#loc982 = loc("forward":4294967295:5412)
+#loc983 = loc("forward":4294967295:5413)
+#loc984 = loc("forward":4294967295:5414)
+#loc985 = loc("forward":4294967295:5415)
+#loc986 = loc("forward":4294967295:5416)
+#loc987 = loc("forward":4294967295:5417)
+#loc988 = loc("forward":4294967295:5418)
+#loc989 = loc("forward":4294967295:5419)
+#loc990 = loc("forward":4294967295:5421)
+#loc991 = loc("forward":4294967295:5422)
+#loc992 = loc("forward":4294967295:5423)
+#loc993 = loc("forward":4294967295:5424)
+#loc994 = loc("forward":4294967295:5426)
+#loc995 = loc("forward":4294967295:5427)
+#loc996 = loc("forward":4294967295:5428)
+#loc997 = loc("forward":4294967295:5430)
+#loc998 = loc("forward":4294967295:5432)
+#loc999 = loc("forward":4294967295:5433)
+#loc1000 = loc("forward":4294967295:5434)
+#loc1001 = loc("forward":4294967295:5435)
+#loc1002 = loc("forward":4294967295:5436)
+#loc1003 = loc("forward":4294967295:5437)
+#loc1004 = loc("forward":4294967295:5438)
+#loc1005 = loc("forward":4294967295:5439)
+#loc1006 = loc("forward":4294967295:5440)
+#loc1007 = loc("forward":4294967295:5441)
+#loc1008 = loc("forward":4294967295:5443)
+#loc1009 = loc("forward":4294967295:5445)
+#loc1010 = loc("forward":4294967295:5446)
+#loc1011 = loc("forward":4294967295:5447)
+#loc1012 = loc("forward":4294967295:5449)
+#loc1013 = loc("forward":4294967295:5450)
+#loc1014 = loc("forward":4294967295:5451)
+#loc1015 = loc("forward":4294967295:5452)
+#loc1016 = loc("forward":4294967295:5453)
+#loc1017 = loc("forward":4294967295:5454)
+#loc1018 = loc("forward":4294967295:5455)
+#loc1019 = loc("forward":4294967295:5456)
+#loc1020 = loc("forward":4294967295:5457)
+#loc1021 = loc("forward":4294967295:5458)
+#loc1022 = loc("forward":4294967295:5460)
+#loc1023 = loc("forward":4294967295:5461)
+#loc1024 = loc("forward":4294967295:5462)
+#loc1025 = loc("forward":4294967295:5464)
+#loc1026 = loc("forward":4294967295:5465)
+#loc1027 = loc("forward":4294967295:5467)
+#loc1028 = loc("forward":4294967295:5468)
+#loc1029 = loc("forward":4294967295:5469)
+#loc1030 = loc("forward":4294967295:5470)
+#loc1031 = loc("forward":4294967295:5471)
+#loc1032 = loc("forward":4294967295:5472)
+#loc1033 = loc("forward":4294967295:5474)
+#loc1034 = loc("forward":4294967295:5475)
+#loc1035 = loc("forward":4294967295:5476)
+#loc1036 = loc("forward":4294967295:5477)
+#loc1037 = loc("forward":4294967295:5479)
+#loc1038 = loc("forward":4294967295:5480)
+#loc1039 = loc("forward":4294967295:5481)
+#loc1040 = loc("forward":4294967295:5483)
+#loc1041 = loc("forward":4294967295:5484)
+#loc1042 = loc("forward":4294967295:5486)
+#loc1043 = loc("forward":4294967295:5487)
+#loc1044 = loc("forward":4294967295:5489)
+#loc1045 = loc("forward":4294967295:5490)
+#loc1046 = loc("forward":4294967295:5491)
+#loc1047 = loc("forward":4294967295:5492)
+#loc1048 = loc("forward":4294967295:5493)
+#loc1049 = loc("forward":4294967295:5494)
+#loc1050 = loc("forward":4294967295:5496)
+#loc1051 = loc("forward":4294967295:5497)
+#loc1052 = loc("forward":4294967295:5498)
+#loc1053 = loc("forward":4294967295:5500)
+#loc1054 = loc("forward":4294967295:5501)
+#loc1055 = loc("forward":4294967295:5502)
+#loc1056 = loc("forward":4294967295:5503)
+#loc1057 = loc("forward":4294967295:5505)
+#loc1058 = loc("forward":4294967295:5506)
+#loc1059 = loc("forward":4294967295:5507)
+#loc1060 = loc("forward":4294967295:5509)
+#loc1061 = loc("forward":4294967295:5511)
+#loc1062 = loc("forward":4294967295:5512)
+#loc1063 = loc("forward":4294967295:5513)
+#loc1064 = loc("forward":4294967295:5514)
+#loc1065 = loc("forward":4294967295:5515)
+#loc1066 = loc("forward":4294967295:5516)
+#loc1067 = loc("forward":4294967295:5517)
+#loc1068 = loc("forward":4294967295:5518)
+#loc1069 = loc("forward":4294967295:5519)
+#loc1070 = loc("forward":4294967295:5521)
+#loc1071 = loc("forward":4294967295:5522)
+#loc1072 = loc("forward":4294967295:5523)
+#loc1073 = loc("forward":4294967295:5524)
+#loc1074 = loc("forward":4294967295:5526)
+#loc1075 = loc("forward":4294967295:5527)
+#loc1076 = loc("forward":4294967295:5528)
+#loc1077 = loc("forward":4294967295:5530)
+#loc1078 = loc("forward":4294967295:5532)
+#loc1079 = loc("forward":4294967295:5533)
+#loc1080 = loc("forward":4294967295:5534)
+#loc1081 = loc("forward":4294967295:5535)
+#loc1082 = loc("forward":4294967295:5536)
+#loc1083 = loc("forward":4294967295:5537)
+#loc1084 = loc("forward":4294967295:5538)
+#loc1085 = loc("forward":4294967295:5539)
+#loc1086 = loc("forward":4294967295:5540)
+#loc1087 = loc("forward":4294967295:5541)
+#loc1088 = loc("forward":4294967295:5543)
+#loc1089 = loc("forward":4294967295:5545)
+#loc1090 = loc("forward":4294967295:5546)
+#loc1091 = loc("forward":4294967295:5547)
+#loc1092 = loc("forward":4294967295:5549)
+#loc1093 = loc("forward":4294967295:5550)
+#loc1094 = loc("forward":4294967295:5551)
+#loc1095 = loc("forward":4294967295:5552)
+#loc1096 = loc("forward":4294967295:5553)
+#loc1097 = loc("forward":4294967295:5554)
+#loc1098 = loc("forward":4294967295:5555)
+#loc1099 = loc("forward":4294967295:5556)
+#loc1100 = loc("forward":4294967295:5557)
+#loc1101 = loc("forward":4294967295:5558)
+#loc1102 = loc("forward":4294967295:5560)
+#loc1103 = loc("forward":4294967295:5561)
+#loc1104 = loc("forward":4294967295:5562)
+#loc1105 = loc("forward":4294967295:5564)
+#loc1106 = loc("forward":4294967295:5565)
+#loc1107 = loc("forward":4294967295:5567)
+#loc1108 = loc("forward":4294967295:5568)
+#loc1109 = loc("forward":4294967295:5569)
+#loc1110 = loc("forward":4294967295:5570)
+#loc1111 = loc("forward":4294967295:5571)
+#loc1112 = loc("forward":4294967295:5572)
+#loc1113 = loc("forward":4294967295:5574)
+#loc1114 = loc("forward":4294967295:5575)
+#loc1115 = loc("forward":4294967295:5576)
+#loc1116 = loc("forward":4294967295:5577)
+#loc1117 = loc("forward":4294967295:5579)
+#loc1118 = loc("forward":4294967295:5580)
+#loc1119 = loc("forward":4294967295:5581)
+#loc1120 = loc("forward":4294967295:5583)
+#loc1121 = loc("forward":4294967295:5584)
+#loc1122 = loc("forward":4294967295:5586)
+#loc1123 = loc("forward":4294967295:5587)
+#loc1124 = loc("forward":4294967295:5589)
+#loc1125 = loc("forward":4294967295:5590)
+#loc1126 = loc("forward":4294967295:5591)
+#loc1127 = loc("forward":4294967295:5592)
+#loc1128 = loc("forward":4294967295:5593)
+#loc1129 = loc("forward":4294967295:5594)
+#loc1130 = loc("forward":4294967295:5596)
+#loc1131 = loc("forward":4294967295:5597)
+#loc1132 = loc("forward":4294967295:5598)
+#loc1133 = loc("forward":4294967295:5600)
+#loc1134 = loc("forward":4294967295:5601)
+#loc1135 = loc("forward":4294967295:5602)
+#loc1136 = loc("forward":4294967295:5603)
+#loc1137 = loc("forward":4294967295:5605)
+#loc1138 = loc("forward":4294967295:5606)
+#loc1139 = loc("forward":4294967295:5607)
+#loc1140 = loc("forward":4294967295:5609)
+#loc1141 = loc("forward":4294967295:5611)
+#loc1142 = loc("forward":4294967295:5612)
+#loc1143 = loc("forward":4294967295:5613)
+#loc1144 = loc("forward":4294967295:5614)
+#loc1145 = loc("forward":4294967295:5615)
+#loc1146 = loc("forward":4294967295:5616)
+#loc1147 = loc("forward":4294967295:5617)
+#loc1148 = loc("forward":4294967295:5618)
+#loc1149 = loc("forward":4294967295:5619)
+#loc1150 = loc("forward":4294967295:5621)
+#loc1151 = loc("forward":4294967295:5622)
+#loc1152 = loc("forward":4294967295:5623)
+#loc1153 = loc("forward":4294967295:5624)
+#loc1154 = loc("forward":4294967295:5626)
+#loc1155 = loc("forward":4294967295:5627)
+#loc1156 = loc("forward":4294967295:5628)
+#loc1157 = loc("forward":4294967295:5630)
+#loc1158 = loc("forward":4294967295:5632)
+#loc1159 = loc("forward":4294967295:5633)
+#loc1160 = loc("forward":4294967295:5634)
+#loc1161 = loc("forward":4294967295:5635)
+#loc1162 = loc("forward":4294967295:5636)
+#loc1163 = loc("forward":4294967295:5637)
+#loc1164 = loc("forward":4294967295:5638)
+#loc1165 = loc("forward":4294967295:5639)
+#loc1166 = loc("forward":4294967295:5640)
+#loc1167 = loc("forward":4294967295:5641)
+#loc1168 = loc("forward":4294967295:5643)
+#loc1169 = loc("forward":4294967295:5645)
+#loc1170 = loc("forward":4294967295:5646)
+#loc1171 = loc("forward":4294967295:5647)
+#loc1172 = loc("forward":4294967295:5649)
+#loc1173 = loc("forward":4294967295:5650)
+#loc1174 = loc("forward":4294967295:5651)
+#loc1175 = loc("forward":4294967295:5652)
+#loc1176 = loc("forward":4294967295:5653)
+#loc1177 = loc("forward":4294967295:5654)
+#loc1178 = loc("forward":4294967295:5655)
+#loc1179 = loc("forward":4294967295:5656)
+#loc1180 = loc("forward":4294967295:5657)
+#loc1181 = loc("forward":4294967295:5658)
+#loc1182 = loc("forward":4294967295:5660)
+#loc1183 = loc("forward":4294967295:5661)
+#loc1184 = loc("forward":4294967295:5662)
+#loc1185 = loc("forward":4294967295:5664)
+#loc1186 = loc("forward":4294967295:5665)
+#loc1187 = loc("forward":4294967295:5667)
+#loc1188 = loc("forward":4294967295:5668)
+#loc1189 = loc("forward":4294967295:5669)
+#loc1190 = loc("forward":4294967295:5670)
+#loc1191 = loc("forward":4294967295:5671)
+#loc1192 = loc("forward":4294967295:5672)
+#loc1193 = loc("forward":4294967295:5674)
+#loc1194 = loc("forward":4294967295:5675)
+#loc1195 = loc("forward":4294967295:5676)
+#loc1196 = loc("forward":4294967295:5677)
+#loc1197 = loc("forward":4294967295:5679)
+#loc1198 = loc("forward":4294967295:5680)
+#loc1199 = loc("forward":4294967295:5681)
+#loc1200 = loc("forward":4294967295:5683)
+#loc1201 = loc("forward":4294967295:5684)
+#loc1202 = loc("forward":4294967295:5686)
+#loc1203 = loc("forward":4294967295:5687)
+#loc1204 = loc("forward":4294967295:5689)
+#loc1205 = loc("forward":4294967295:5690)
+#loc1206 = loc("forward":4294967295:5691)
+#loc1207 = loc("forward":4294967295:5692)
+#loc1208 = loc("forward":4294967295:5693)
+#loc1209 = loc("forward":4294967295:5694)
+#loc1210 = loc("forward":4294967295:5696)
+#loc1211 = loc("forward":4294967295:5697)
+#loc1212 = loc("forward":4294967295:5698)
+#loc1213 = loc("forward":4294967295:5700)
+#loc1214 = loc("forward":4294967295:5701)
+#loc1215 = loc("forward":4294967295:5702)
+#loc1216 = loc("forward":4294967295:5703)
+#loc1217 = loc("forward":4294967295:5705)
+#loc1218 = loc("forward":4294967295:5706)
+#loc1219 = loc("forward":4294967295:5707)
+#loc1220 = loc("forward":4294967295:5709)
+#loc1221 = loc("forward":4294967295:5711)
+#loc1222 = loc("forward":4294967295:5712)
+#loc1223 = loc("forward":4294967295:5713)
+#loc1224 = loc("forward":4294967295:5714)
+#loc1225 = loc("forward":4294967295:5715)
+#loc1226 = loc("forward":4294967295:5716)
+#loc1227 = loc("forward":4294967295:5717)
+#loc1228 = loc("forward":4294967295:5718)
+#loc1229 = loc("forward":4294967295:5719)
+#loc1230 = loc("forward":4294967295:5721)
+#loc1231 = loc("forward":4294967295:5722)
+#loc1232 = loc("forward":4294967295:5723)
+#loc1233 = loc("forward":4294967295:5724)
+#loc1234 = loc("forward":4294967295:5726)
+#loc1235 = loc("forward":4294967295:5727)
+#loc1236 = loc("forward":4294967295:5728)
+#loc1237 = loc("forward":4294967295:5730)
+#loc1238 = loc("forward":4294967295:5732)
+#loc1239 = loc("forward":4294967295:5733)
+#loc1240 = loc("forward":4294967295:5734)
+#loc1241 = loc("forward":4294967295:5735)
+#loc1242 = loc("forward":4294967295:5736)
+#loc1243 = loc("forward":4294967295:5737)
+#loc1244 = loc("forward":4294967295:5738)
+#loc1245 = loc("forward":4294967295:5739)
+#loc1246 = loc("forward":4294967295:5740)
+#loc1247 = loc("forward":4294967295:5741)
+#loc1248 = loc("forward":4294967295:5743)
+#loc1249 = loc("forward":4294967295:5745)
+#loc1250 = loc("forward":4294967295:5746)
+#loc1251 = loc("forward":4294967295:5747)
+#loc1252 = loc("forward":4294967295:5749)
+#loc1253 = loc("forward":4294967295:5750)
+#loc1254 = loc("forward":4294967295:5751)
+#loc1255 = loc("forward":4294967295:5752)
+#loc1256 = loc("forward":4294967295:5753)
+#loc1257 = loc("forward":4294967295:5754)
+#loc1258 = loc("forward":4294967295:5755)
+#loc1259 = loc("forward":4294967295:5756)
+#loc1260 = loc("forward":4294967295:5757)
+#loc1261 = loc("forward":4294967295:5758)
+#loc1262 = loc("forward":4294967295:5760)
+#loc1263 = loc("forward":4294967295:5761)
+#loc1264 = loc("forward":4294967295:5762)
+#loc1265 = loc("forward":4294967295:5764)
+#loc1266 = loc("forward":4294967295:5765)
+#loc1267 = loc("forward":4294967295:5767)
+#loc1268 = loc("forward":4294967295:5768)
+#loc1269 = loc("forward":4294967295:5769)
+#loc1270 = loc("forward":4294967295:5770)
+#loc1271 = loc("forward":4294967295:5771)
+#loc1272 = loc("forward":4294967295:5772)
+#loc1273 = loc("forward":4294967295:5774)
+#loc1274 = loc("forward":4294967295:5775)
+#loc1275 = loc("forward":4294967295:5776)
+#loc1276 = loc("forward":4294967295:5777)
+#loc1277 = loc("forward":4294967295:5779)
+#loc1278 = loc("forward":4294967295:5780)
+#loc1279 = loc("forward":4294967295:5781)
+#loc1280 = loc("forward":4294967295:5783)
+#loc1281 = loc("forward":4294967295:5784)
+#loc1282 = loc("forward":4294967295:5786)
+#loc1283 = loc("forward":4294967295:5787)
+#loc1284 = loc("forward":4294967295:5789)
+#loc1285 = loc("forward":4294967295:5790)
+#loc1286 = loc("forward":4294967295:5791)
+#loc1287 = loc("forward":4294967295:5792)
+#loc1288 = loc("forward":4294967295:5793)
+#loc1289 = loc("forward":4294967295:5794)
+#loc1290 = loc("forward":4294967295:5796)
+#loc1291 = loc("forward":4294967295:5797)
+#loc1292 = loc("forward":4294967295:5798)
+#loc1293 = loc("forward":4294967295:5800)
+#loc1294 = loc("forward":4294967295:5801)
+#loc1295 = loc("forward":4294967295:5802)
+#loc1296 = loc("forward":4294967295:5803)
+#loc1297 = loc("forward":4294967295:5805)
+#loc1298 = loc("forward":4294967295:5806)
+#loc1299 = loc("forward":4294967295:5807)
+#loc1300 = loc("forward":4294967295:5809)
+#loc1301 = loc("forward":4294967295:5811)
+#loc1302 = loc("forward":4294967295:5812)
+#loc1303 = loc("forward":4294967295:5813)
+#loc1304 = loc("forward":4294967295:5814)
+#loc1305 = loc("forward":4294967295:5815)
+#loc1306 = loc("forward":4294967295:5816)
+#loc1307 = loc("forward":4294967295:5817)
+#loc1308 = loc("forward":4294967295:5818)
+#loc1309 = loc("forward":4294967295:5819)
+#loc1310 = loc("forward":4294967295:5821)
+#loc1311 = loc("forward":4294967295:5822)
+#loc1312 = loc("forward":4294967295:5823)
+#loc1313 = loc("forward":4294967295:5824)
+#loc1314 = loc("forward":4294967295:5826)
+#loc1315 = loc("forward":4294967295:5827)
+#loc1316 = loc("forward":4294967295:5828)
+#loc1317 = loc("forward":4294967295:5830)
+#loc1318 = loc("forward":4294967295:5832)
+#loc1319 = loc("forward":4294967295:5833)
+#loc1320 = loc("forward":4294967295:5834)
+#loc1321 = loc("forward":4294967295:5835)
+#loc1322 = loc("forward":4294967295:5836)
+#loc1323 = loc("forward":4294967295:5837)
+#loc1324 = loc("forward":4294967295:5838)
+#loc1325 = loc("forward":4294967295:5839)
+#loc1326 = loc("forward":4294967295:5840)
+#loc1327 = loc("forward":4294967295:5841)
+#loc1328 = loc("forward":4294967295:5843)
+#loc1329 = loc("forward":4294967295:5845)
+#loc1330 = loc("forward":4294967295:5846)
+#loc1331 = loc("forward":4294967295:5847)
+#loc1332 = loc("forward":4294967295:5849)
+#loc1333 = loc("forward":4294967295:5850)
+#loc1334 = loc("forward":4294967295:5851)
+#loc1335 = loc("forward":4294967295:5852)
+#loc1336 = loc("forward":4294967295:5853)
+#loc1337 = loc("forward":4294967295:5854)
+#loc1338 = loc("forward":4294967295:5855)
+#loc1339 = loc("forward":4294967295:5856)
+#loc1340 = loc("forward":4294967295:5857)
+#loc1341 = loc("forward":4294967295:5858)
+#loc1342 = loc("forward":4294967295:5860)
+#loc1343 = loc("forward":4294967295:5861)
+#loc1344 = loc("forward":4294967295:5862)
+#loc1345 = loc("forward":4294967295:5864)
+#loc1346 = loc("forward":4294967295:5865)
+#loc1347 = loc("forward":4294967295:5867)
+#loc1348 = loc("forward":4294967295:5868)
+#loc1349 = loc("forward":4294967295:5869)
+#loc1350 = loc("forward":4294967295:5870)
+#loc1351 = loc("forward":4294967295:5871)
+#loc1352 = loc("forward":4294967295:5872)
+#loc1353 = loc("forward":4294967295:5874)
+#loc1354 = loc("forward":4294967295:5875)
+#loc1355 = loc("forward":4294967295:5876)
+#loc1356 = loc("forward":4294967295:5877)
+#loc1357 = loc("forward":4294967295:5879)
+#loc1358 = loc("forward":4294967295:5880)
+#loc1359 = loc("forward":4294967295:5881)
+#loc1360 = loc("forward":4294967295:5883)
+#loc1361 = loc("forward":4294967295:5884)
+#loc1362 = loc("forward":4294967295:5886)
+#loc1363 = loc("forward":4294967295:5887)
+#loc1364 = loc("forward":4294967295:5889)
+#loc1365 = loc("forward":4294967295:5890)
+#loc1366 = loc("forward":4294967295:5891)
+#loc1367 = loc("forward":4294967295:5892)
+#loc1368 = loc("forward":4294967295:5893)
+#loc1369 = loc("forward":4294967295:5894)
+#loc1370 = loc("forward":4294967295:5896)
+#loc1371 = loc("forward":4294967295:5897)
+#loc1372 = loc("forward":4294967295:5898)
+#loc1373 = loc("forward":4294967295:5900)
+#loc1374 = loc("forward":4294967295:5901)
+#loc1375 = loc("forward":4294967295:5902)
+#loc1376 = loc("forward":4294967295:5903)
+#loc1377 = loc("forward":4294967295:5905)
+#loc1378 = loc("forward":4294967295:5906)
+#loc1379 = loc("forward":4294967295:5907)
+#loc1380 = loc("forward":4294967295:5909)
+#loc1381 = loc("forward":4294967295:5911)
+#loc1382 = loc("forward":4294967295:5912)
+#loc1383 = loc("forward":4294967295:5913)
+#loc1384 = loc("forward":4294967295:5914)
+#loc1385 = loc("forward":4294967295:5915)
+#loc1386 = loc("forward":4294967295:5916)
+#loc1387 = loc("forward":4294967295:5917)
+#loc1388 = loc("forward":4294967295:5918)
+#loc1389 = loc("forward":4294967295:5919)
+#loc1390 = loc("forward":4294967295:5921)
+#loc1391 = loc("forward":4294967295:5922)
+#loc1392 = loc("forward":4294967295:5923)
+#loc1393 = loc("forward":4294967295:5924)
+#loc1394 = loc("forward":4294967295:5926)
+#loc1395 = loc("forward":4294967295:5927)
+#loc1396 = loc("forward":4294967295:5928)
+#loc1397 = loc("forward":4294967295:5930)
+#loc1398 = loc("forward":4294967295:5932)
+#loc1399 = loc("forward":4294967295:5933)
+#loc1400 = loc("forward":4294967295:5934)
+#loc1401 = loc("forward":4294967295:5935)
+#loc1402 = loc("forward":4294967295:5936)
+#loc1403 = loc("forward":4294967295:5937)
+#loc1404 = loc("forward":4294967295:5938)
+#loc1405 = loc("forward":4294967295:5939)
+#loc1406 = loc("forward":4294967295:5940)
+#loc1407 = loc("forward":4294967295:5941)
+#loc1408 = loc("forward":4294967295:5943)
+#loc1409 = loc("forward":4294967295:5945)
+#loc1410 = loc("forward":4294967295:5946)
+#loc1411 = loc("forward":4294967295:5947)
+#loc1412 = loc("forward":4294967295:5949)
+#loc1413 = loc("forward":4294967295:5950)
+#loc1414 = loc("forward":4294967295:5951)
+#loc1415 = loc("forward":4294967295:5952)
+#loc1416 = loc("forward":4294967295:5953)
+#loc1417 = loc("forward":4294967295:5954)
+#loc1418 = loc("forward":4294967295:5955)
+#loc1419 = loc("forward":4294967295:5956)
+#loc1420 = loc("forward":4294967295:5957)
+#loc1421 = loc("forward":4294967295:5958)
+#loc1422 = loc("forward":4294967295:5960)
+#loc1423 = loc("forward":4294967295:5961)
+#loc1424 = loc("forward":4294967295:5962)
+#loc1425 = loc("forward":4294967295:5964)
+#loc1426 = loc("forward":4294967295:5965)
+#loc1427 = loc("forward":4294967295:5967)
+#loc1428 = loc("forward":4294967295:5968)
+#loc1429 = loc("forward":4294967295:5969)
+#loc1430 = loc("forward":4294967295:5970)
+#loc1431 = loc("forward":4294967295:5971)
+#loc1432 = loc("forward":4294967295:5972)
+#loc1433 = loc("forward":4294967295:5974)
+#loc1434 = loc("forward":4294967295:5975)
+#loc1435 = loc("forward":4294967295:5976)
+#loc1436 = loc("forward":4294967295:5977)
+#loc1437 = loc("forward":4294967295:5979)
+#loc1438 = loc("forward":4294967295:5980)
+#loc1439 = loc("forward":4294967295:5981)
+#loc1440 = loc("forward":4294967295:5983)
+#loc1441 = loc("forward":4294967295:5984)
+#loc1442 = loc("forward":4294967295:5986)
+#loc1443 = loc("forward":4294967295:5987)
+#loc1444 = loc("forward":4294967295:5989)
+#loc1445 = loc("forward":4294967295:5990)
+#loc1446 = loc("forward":4294967295:5991)
+#loc1447 = loc("forward":4294967295:5992)
+#loc1448 = loc("forward":4294967295:5993)
+#loc1449 = loc("forward":4294967295:5994)
+#loc1450 = loc("forward":4294967295:5996)
+#loc1451 = loc("forward":4294967295:5997)
+#loc1452 = loc("forward":4294967295:5998)
+#loc1453 = loc("forward":4294967295:6000)
+#loc1454 = loc("forward":4294967295:6001)
+#loc1455 = loc("forward":4294967295:6002)
+#loc1456 = loc("forward":4294967295:6003)
+#loc1457 = loc("forward":4294967295:6005)
+#loc1458 = loc("forward":4294967295:6006)
+#loc1459 = loc("forward":4294967295:6007)
+#loc1460 = loc("forward":4294967295:6009)
+#loc1461 = loc("forward":4294967295:6011)
+#loc1462 = loc("forward":4294967295:6012)
+#loc1463 = loc("forward":4294967295:6013)
+#loc1464 = loc("forward":4294967295:6014)
+#loc1465 = loc("forward":4294967295:6015)
+#loc1466 = loc("forward":4294967295:6016)
+#loc1467 = loc("forward":4294967295:6017)
+#loc1468 = loc("forward":4294967295:6018)
+#loc1469 = loc("forward":4294967295:6019)
+#loc1470 = loc("forward":4294967295:6021)
+#loc1471 = loc("forward":4294967295:6022)
+#loc1472 = loc("forward":4294967295:6023)
+#loc1473 = loc("forward":4294967295:6024)
+#loc1474 = loc("forward":4294967295:6026)
+#loc1475 = loc("forward":4294967295:6027)
+#loc1476 = loc("forward":4294967295:6028)
+#loc1477 = loc("forward":4294967295:6030)
+#loc1478 = loc("forward":4294967295:6032)
+#loc1479 = loc("forward":4294967295:6033)
+#loc1480 = loc("forward":4294967295:6034)
+#loc1481 = loc("forward":4294967295:6035)
+#loc1482 = loc("forward":4294967295:6036)
+#loc1483 = loc("forward":4294967295:6037)
+#loc1484 = loc("forward":4294967295:6038)
+#loc1485 = loc("forward":4294967295:6039)
+#loc1486 = loc("forward":4294967295:6040)
+#loc1487 = loc("forward":4294967295:6041)
+#loc1488 = loc("forward":4294967295:6043)
+#loc1489 = loc("forward":4294967295:6045)
+#loc1490 = loc("forward":4294967295:6046)
+#loc1491 = loc("forward":4294967295:6047)
+#loc1492 = loc("forward":4294967295:6049)
+#loc1493 = loc("forward":4294967295:6050)
+#loc1494 = loc("forward":4294967295:6051)
+#loc1495 = loc("forward":4294967295:6052)
+#loc1496 = loc("forward":4294967295:6053)
+#loc1497 = loc("forward":4294967295:6054)
+#loc1498 = loc("forward":4294967295:6055)
+#loc1499 = loc("forward":4294967295:6056)
+#loc1500 = loc("forward":4294967295:6057)
+#loc1501 = loc("forward":4294967295:6058)
+#loc1502 = loc("forward":4294967295:6060)
+#loc1503 = loc("forward":4294967295:6061)
+#loc1504 = loc("forward":4294967295:6062)
+#loc1505 = loc("forward":4294967295:6064)
+#loc1506 = loc("forward":4294967295:6065)
+#loc1507 = loc("forward":4294967295:6067)
+#loc1508 = loc("forward":4294967295:6068)
+#loc1509 = loc("forward":4294967295:6069)
+#loc1510 = loc("forward":4294967295:6070)
+#loc1511 = loc("forward":4294967295:6071)
+#loc1512 = loc("forward":4294967295:6072)
+#loc1513 = loc("forward":4294967295:6074)
+#loc1514 = loc("forward":4294967295:6075)
+#loc1515 = loc("forward":4294967295:6076)
+#loc1516 = loc("forward":4294967295:6077)
+#loc1517 = loc("forward":4294967295:6079)
+#loc1518 = loc("forward":4294967295:6080)
+#loc1519 = loc("forward":4294967295:6081)
+#loc1520 = loc("forward":4294967295:6083)
+#loc1521 = loc("forward":4294967295:6084)
+#loc1522 = loc("forward":4294967295:6086)
+#loc1523 = loc("forward":4294967295:6087)
+#loc1524 = loc("forward":4294967295:6089)
+#loc1525 = loc("forward":4294967295:6090)
+#loc1526 = loc("forward":4294967295:6091)
+#loc1527 = loc("forward":4294967295:6092)
+#loc1528 = loc("forward":4294967295:6093)
+#loc1529 = loc("forward":4294967295:6094)
+#loc1530 = loc("forward":4294967295:6096)
+#loc1531 = loc("forward":4294967295:6097)
+#loc1532 = loc("forward":4294967295:6098)
+#loc1533 = loc("forward":4294967295:6100)
+#loc1534 = loc("forward":4294967295:6101)
+#loc1535 = loc("forward":4294967295:6102)
+#loc1536 = loc("forward":4294967295:6103)
+#loc1537 = loc("forward":4294967295:6105)
+#loc1538 = loc("forward":4294967295:6106)
+#loc1539 = loc("forward":4294967295:6107)
+#loc1540 = loc("forward":4294967295:6109)
+#loc1541 = loc("forward":4294967295:6111)
+#loc1542 = loc("forward":4294967295:6112)
+#loc1543 = loc("forward":4294967295:6113)
+#loc1544 = loc("forward":4294967295:6114)
+#loc1545 = loc("forward":4294967295:6115)
+#loc1546 = loc("forward":4294967295:6116)
+#loc1547 = loc("forward":4294967295:6117)
+#loc1548 = loc("forward":4294967295:6118)
+#loc1549 = loc("forward":4294967295:6119)
+#loc1550 = loc("forward":4294967295:6121)
+#loc1551 = loc("forward":4294967295:6122)
+#loc1552 = loc("forward":4294967295:6123)
+#loc1553 = loc("forward":4294967295:6124)
+#loc1554 = loc("forward":4294967295:6126)
+#loc1555 = loc("forward":4294967295:6127)
+#loc1556 = loc("forward":4294967295:6128)
+#loc1557 = loc("forward":4294967295:6130)
+#loc1558 = loc("forward":4294967295:6132)
+#loc1559 = loc("forward":4294967295:6133)
+#loc1560 = loc("forward":4294967295:6134)
+#loc1561 = loc("forward":4294967295:6135)
+#loc1562 = loc("forward":4294967295:6136)
+#loc1563 = loc("forward":4294967295:6137)
+#loc1564 = loc("forward":4294967295:6138)
+#loc1565 = loc("forward":4294967295:6139)
+#loc1566 = loc("forward":4294967295:6140)
+#loc1567 = loc("forward":4294967295:6141)
+#loc1568 = loc("forward":4294967295:6143)
+#loc1569 = loc("forward":4294967295:6145)
+#loc1570 = loc("forward":4294967295:6146)
+#loc1571 = loc("forward":4294967295:6147)
+#loc1572 = loc("forward":4294967295:6149)
+#loc1573 = loc("forward":4294967295:6150)
+#loc1574 = loc("forward":4294967295:6151)
+#loc1575 = loc("forward":4294967295:6152)
+#loc1576 = loc("forward":4294967295:6153)
+#loc1577 = loc("forward":4294967295:6154)
+#loc1578 = loc("forward":4294967295:6155)
+#loc1579 = loc("forward":4294967295:6156)
+#loc1580 = loc("forward":4294967295:6157)
+#loc1581 = loc("forward":4294967295:6158)
+#loc1582 = loc("forward":4294967295:6160)
+#loc1583 = loc("forward":4294967295:6161)
+#loc1584 = loc("forward":4294967295:6162)
+#loc1585 = loc("forward":4294967295:6164)
+#loc1586 = loc("forward":4294967295:6165)
+#loc1587 = loc("forward":4294967295:6167)
+#loc1588 = loc("forward":4294967295:6168)
+#loc1589 = loc("forward":4294967295:6169)
+#loc1590 = loc("forward":4294967295:6170)
+#loc1591 = loc("forward":4294967295:6171)
+#loc1592 = loc("forward":4294967295:6172)
+#loc1593 = loc("forward":4294967295:6174)
+#loc1594 = loc("forward":4294967295:6175)
+#loc1595 = loc("forward":4294967295:6176)
+#loc1596 = loc("forward":4294967295:6177)
+#loc1597 = loc("forward":4294967295:6179)
+#loc1598 = loc("forward":4294967295:6180)
+#loc1599 = loc("forward":4294967295:6181)
+#loc1600 = loc("forward":4294967295:6183)
+#loc1601 = loc("forward":4294967295:6184)
+#loc1602 = loc("forward":4294967295:6186)
+#loc1603 = loc("forward":4294967295:6187)
+#loc1604 = loc("forward":4294967295:6189)
+#loc1605 = loc("forward":4294967295:6190)
+#loc1606 = loc("forward":4294967295:6191)
+#loc1607 = loc("forward":4294967295:6192)
+#loc1608 = loc("forward":4294967295:6193)
+#loc1609 = loc("forward":4294967295:6194)
+#loc1610 = loc("forward":4294967295:6196)
+#loc1611 = loc("forward":4294967295:6197)
+#loc1612 = loc("forward":4294967295:6198)
+#loc1613 = loc("forward":4294967295:6200)
+#loc1614 = loc("forward":4294967295:6201)
+#loc1615 = loc("forward":4294967295:6202)
+#loc1616 = loc("forward":4294967295:6203)
+#loc1617 = loc("forward":4294967295:6205)
+#loc1618 = loc("forward":4294967295:6206)
+#loc1619 = loc("forward":4294967295:6207)
+#loc1620 = loc("forward":4294967295:6209)
+#loc1621 = loc("forward":4294967295:6211)
+#loc1622 = loc("forward":4294967295:6212)
+#loc1623 = loc("forward":4294967295:6213)
+#loc1624 = loc("forward":4294967295:6214)
+#loc1625 = loc("forward":4294967295:6215)
+#loc1626 = loc("forward":4294967295:6216)
+#loc1627 = loc("forward":4294967295:6217)
+#loc1628 = loc("forward":4294967295:6218)
+#loc1629 = loc("forward":4294967295:6219)
+#loc1630 = loc("forward":4294967295:6221)
+#loc1631 = loc("forward":4294967295:6222)
+#loc1632 = loc("forward":4294967295:6223)
+#loc1633 = loc("forward":4294967295:6224)
+#loc1634 = loc("forward":4294967295:6226)
+#loc1635 = loc("forward":4294967295:6227)
+#loc1636 = loc("forward":4294967295:6228)
+#loc1637 = loc("forward":4294967295:6230)
+#loc1638 = loc("forward":4294967295:6232)
+#loc1639 = loc("forward":4294967295:6233)
+#loc1640 = loc("forward":4294967295:6234)
+#loc1641 = loc("forward":4294967295:6235)
+#loc1642 = loc("forward":4294967295:6236)
+#loc1643 = loc("forward":4294967295:6237)
+#loc1644 = loc("forward":4294967295:6238)
+#loc1645 = loc("forward":4294967295:6239)
+#loc1646 = loc("forward":4294967295:6240)
+#loc1647 = loc("forward":4294967295:6241)
+#loc1648 = loc("forward":4294967295:6243)
+#loc1649 = loc("forward":4294967295:6245)
+#loc1650 = loc("forward":4294967295:6246)
+#loc1651 = loc("forward":4294967295:6247)
+#loc1652 = loc("forward":4294967295:6249)
+#loc1653 = loc("forward":4294967295:6250)
+#loc1654 = loc("forward":4294967295:6251)
+#loc1655 = loc("forward":4294967295:6252)
+#loc1656 = loc("forward":4294967295:6253)
+#loc1657 = loc("forward":4294967295:6254)
+#loc1658 = loc("forward":4294967295:6255)
+#loc1659 = loc("forward":4294967295:6256)
+#loc1660 = loc("forward":4294967295:6257)
+#loc1661 = loc("forward":4294967295:6258)
+#loc1662 = loc("forward":4294967295:6260)
+#loc1663 = loc("forward":4294967295:6261)
+#loc1664 = loc("forward":4294967295:6262)
+#loc1665 = loc("forward":4294967295:6264)
+#loc1666 = loc("forward":4294967295:6265)
+#loc1667 = loc("forward":4294967295:6267)
+#loc1668 = loc("forward":4294967295:6268)
+#loc1669 = loc("forward":4294967295:6269)
+#loc1670 = loc("forward":4294967295:6270)
+#loc1671 = loc("forward":4294967295:6271)
+#loc1672 = loc("forward":4294967295:6272)
+#loc1673 = loc("forward":4294967295:6274)
+#loc1674 = loc("forward":4294967295:6275)
+#loc1675 = loc("forward":4294967295:6276)
+#loc1676 = loc("forward":4294967295:6277)
+#loc1677 = loc("forward":4294967295:6279)
+#loc1678 = loc("forward":4294967295:6280)
+#loc1679 = loc("forward":4294967295:6281)
+#loc1680 = loc("forward":4294967295:6283)
+#loc1681 = loc("forward":4294967295:6284)
+#loc1682 = loc("forward":4294967295:6286)
+#loc1683 = loc("forward":4294967295:6287)
+#loc1684 = loc("forward":4294967295:6289)
+#loc1685 = loc("forward":4294967295:6290)
+#loc1686 = loc("forward":4294967295:6291)
+#loc1687 = loc("forward":4294967295:6292)
+#loc1688 = loc("forward":4294967295:6293)
+#loc1689 = loc("forward":4294967295:6294)
+#loc1690 = loc("forward":4294967295:6296)
+#loc1691 = loc("forward":4294967295:6297)
+#loc1692 = loc("forward":4294967295:6298)
+#loc1693 = loc("forward":4294967295:6300)
+#loc1694 = loc("forward":4294967295:6301)
+#loc1695 = loc("forward":4294967295:6302)
+#loc1696 = loc("forward":4294967295:6303)
+#loc1697 = loc("forward":4294967295:6305)
+#loc1698 = loc("forward":4294967295:6306)
+#loc1699 = loc("forward":4294967295:6307)
+#loc1700 = loc("forward":4294967295:6309)
+#loc1701 = loc("forward":4294967295:6311)
+#loc1702 = loc("forward":4294967295:6312)
+#loc1703 = loc("forward":4294967295:6313)
+#loc1704 = loc("forward":4294967295:6314)
+#loc1705 = loc("forward":4294967295:6315)
+#loc1706 = loc("forward":4294967295:6316)
+#loc1707 = loc("forward":4294967295:6317)
+#loc1708 = loc("forward":4294967295:6318)
+#loc1709 = loc("forward":4294967295:6319)
+#loc1710 = loc("forward":4294967295:6321)
+#loc1711 = loc("forward":4294967295:6322)
+#loc1712 = loc("forward":4294967295:6323)
+#loc1713 = loc("forward":4294967295:6324)
+#loc1714 = loc("forward":4294967295:6326)
+#loc1715 = loc("forward":4294967295:6327)
+#loc1716 = loc("forward":4294967295:6328)
+#loc1717 = loc("forward":4294967295:6330)
+#loc1718 = loc("forward":4294967295:6332)
+#loc1719 = loc("forward":4294967295:6333)
+#loc1720 = loc("forward":4294967295:6334)
+#loc1721 = loc("forward":4294967295:6335)
+#loc1722 = loc("forward":4294967295:6336)
+#loc1723 = loc("forward":4294967295:6337)
+#loc1724 = loc("forward":4294967295:6338)
+#loc1725 = loc("forward":4294967295:6339)
+#loc1726 = loc("forward":4294967295:6340)
+#loc1727 = loc("forward":4294967295:6341)
+#loc1728 = loc("forward":4294967295:6343)
+#loc1729 = loc("forward":4294967295:6345)
+#loc1730 = loc("forward":4294967295:6346)
+#loc1731 = loc("forward":4294967295:6347)
+#loc1732 = loc("forward":4294967295:6349)
+#loc1733 = loc("forward":4294967295:6350)
+#loc1734 = loc("forward":4294967295:6351)
+#loc1735 = loc("forward":4294967295:6352)
+#loc1736 = loc("forward":4294967295:6353)
+#loc1737 = loc("forward":4294967295:6354)
+#loc1738 = loc("forward":4294967295:6355)
+#loc1739 = loc("forward":4294967295:6356)
+#loc1740 = loc("forward":4294967295:6357)
+#loc1741 = loc("forward":4294967295:6358)
+#loc1742 = loc("forward":4294967295:6360)
+#loc1743 = loc("forward":4294967295:6361)
+#loc1744 = loc("forward":4294967295:6362)
+#loc1745 = loc("forward":4294967295:6364)
+#loc1746 = loc("forward":4294967295:6365)
+#loc1747 = loc("forward":4294967295:6367)
+#loc1748 = loc("forward":4294967295:6368)
+#loc1749 = loc("forward":4294967295:6369)
+#loc1750 = loc("forward":4294967295:6370)
+#loc1751 = loc("forward":4294967295:6371)
+#loc1752 = loc("forward":4294967295:6372)
+#loc1753 = loc("forward":4294967295:6374)
+#loc1754 = loc("forward":4294967295:6375)
+#loc1755 = loc("forward":4294967295:6376)
+#loc1756 = loc("forward":4294967295:6377)
+#loc1757 = loc("forward":4294967295:6379)
+#loc1758 = loc("forward":4294967295:6380)
+#loc1759 = loc("forward":4294967295:6381)
+#loc1760 = loc("forward":4294967295:6383)
+#loc1761 = loc("forward":4294967295:6384)
+#loc1762 = loc("forward":4294967295:6386)
+#loc1763 = loc("forward":4294967295:6387)
+#loc1764 = loc("forward":4294967295:6389)
+#loc1765 = loc("forward":4294967295:6390)
+#loc1766 = loc("forward":4294967295:6391)
+#loc1767 = loc("forward":4294967295:6392)
+#loc1768 = loc("forward":4294967295:6393)
+#loc1769 = loc("forward":4294967295:6394)
+#loc1770 = loc("forward":4294967295:6396)
+#loc1771 = loc("forward":4294967295:6397)
+#loc1772 = loc("forward":4294967295:6398)
+#loc1773 = loc("forward":4294967295:6400)
+#loc1774 = loc("forward":4294967295:6401)
+#loc1775 = loc("forward":4294967295:6402)
+#loc1776 = loc("forward":4294967295:6403)
+#loc1777 = loc("forward":4294967295:6405)
+#loc1778 = loc("forward":4294967295:6406)
+#loc1779 = loc("forward":4294967295:6407)
+#loc1780 = loc("forward":4294967295:6409)
+#loc1781 = loc("forward":4294967295:6411)
+#loc1782 = loc("forward":4294967295:6412)
+#loc1783 = loc("forward":4294967295:6413)
+#loc1784 = loc("forward":4294967295:6414)
+#loc1785 = loc("forward":4294967295:6415)
+#loc1786 = loc("forward":4294967295:6416)
+#loc1787 = loc("forward":4294967295:6417)
+#loc1788 = loc("forward":4294967295:6418)
+#loc1789 = loc("forward":4294967295:6419)
+#loc1790 = loc("forward":4294967295:6421)
+#loc1791 = loc("forward":4294967295:6422)
+#loc1792 = loc("forward":4294967295:6423)
+#loc1793 = loc("forward":4294967295:6424)
+#loc1794 = loc("forward":4294967295:6426)
+#loc1795 = loc("forward":4294967295:6427)
+#loc1796 = loc("forward":4294967295:6428)
+#loc1797 = loc("forward":4294967295:6430)
+#loc1798 = loc("forward":4294967295:6432)
+#loc1799 = loc("forward":4294967295:6433)
+#loc1800 = loc("forward":4294967295:6434)
+#loc1801 = loc("forward":4294967295:6435)
+#loc1802 = loc("forward":4294967295:6436)
+#loc1803 = loc("forward":4294967295:6437)
+#loc1804 = loc("forward":4294967295:6438)
+#loc1805 = loc("forward":4294967295:6439)
+#loc1806 = loc("forward":4294967295:6440)
+#loc1807 = loc("forward":4294967295:6441)
+#loc1808 = loc("forward":4294967295:6443)
+#loc1809 = loc("forward":4294967295:6445)
+#loc1810 = loc("forward":4294967295:6446)
+#loc1811 = loc("forward":4294967295:6447)
+#loc1812 = loc("forward":4294967295:6449)
+#loc1813 = loc("forward":4294967295:6450)
+#loc1814 = loc("forward":4294967295:6451)
+#loc1815 = loc("forward":4294967295:6452)
+#loc1816 = loc("forward":4294967295:6453)
+#loc1817 = loc("forward":4294967295:6454)
+#loc1818 = loc("forward":4294967295:6455)
+#loc1819 = loc("forward":4294967295:6456)
+#loc1820 = loc("forward":4294967295:6457)
+#loc1821 = loc("forward":4294967295:6458)
+#loc1822 = loc("forward":4294967295:6460)
+#loc1823 = loc("forward":4294967295:6461)
+#loc1824 = loc("forward":4294967295:6462)
+#loc1825 = loc("forward":4294967295:6464)
+#loc1826 = loc("forward":4294967295:6465)
+#loc1827 = loc("forward":4294967295:6467)
+#loc1828 = loc("forward":4294967295:6468)
+#loc1829 = loc("forward":4294967295:6469)
+#loc1830 = loc("forward":4294967295:6470)
+#loc1831 = loc("forward":4294967295:6471)
+#loc1832 = loc("forward":4294967295:6472)
+#loc1833 = loc("forward":4294967295:6474)
+#loc1834 = loc("forward":4294967295:6475)
+#loc1835 = loc("forward":4294967295:6476)
+#loc1836 = loc("forward":4294967295:6477)
+#loc1837 = loc("forward":4294967295:6479)
+#loc1838 = loc("forward":4294967295:6480)
+#loc1839 = loc("forward":4294967295:6481)
+#loc1840 = loc("forward":4294967295:6483)
+#loc1841 = loc("forward":4294967295:6484)
+#loc1842 = loc("forward":4294967295:6486)
+#loc1843 = loc("forward":4294967295:6487)
+#loc1844 = loc("forward":4294967295:6489)
+#loc1845 = loc("forward":4294967295:6490)
+#loc1846 = loc("forward":4294967295:6491)
+#loc1847 = loc("forward":4294967295:6492)
+#loc1848 = loc("forward":4294967295:6493)
+#loc1849 = loc("forward":4294967295:6494)
+#loc1850 = loc("forward":4294967295:6496)
+#loc1851 = loc("forward":4294967295:6497)
+#loc1852 = loc("forward":4294967295:6498)
+#loc1853 = loc("forward":4294967295:6500)
+#loc1854 = loc("forward":4294967295:6501)
+#loc1855 = loc("forward":4294967295:6502)
+#loc1856 = loc("forward":4294967295:6503)
+#loc1857 = loc("forward":4294967295:6505)
+#loc1858 = loc("forward":4294967295:6506)
+#loc1859 = loc("forward":4294967295:6507)
+#loc1860 = loc("forward":4294967295:6509)
+#loc1861 = loc("forward":4294967295:6511)
+#loc1862 = loc("forward":4294967295:6512)
+#loc1863 = loc("forward":4294967295:6513)
+#loc1864 = loc("forward":4294967295:6514)
+#loc1865 = loc("forward":4294967295:6515)
+#loc1866 = loc("forward":4294967295:6516)
+#loc1867 = loc("forward":4294967295:6517)
+#loc1868 = loc("forward":4294967295:6518)
+#loc1869 = loc("forward":4294967295:6519)
+#loc1870 = loc("forward":4294967295:6521)
+#loc1871 = loc("forward":4294967295:6522)
+#loc1872 = loc("forward":4294967295:6523)
+#loc1873 = loc("forward":4294967295:6524)
+#loc1874 = loc("forward":4294967295:6526)
+#loc1875 = loc("forward":4294967295:6527)
+#loc1876 = loc("forward":4294967295:6528)
+#loc1877 = loc("forward":4294967295:6530)
+#loc1878 = loc("forward":4294967295:6532)
+#loc1879 = loc("forward":4294967295:6533)
+#loc1880 = loc("forward":4294967295:6534)
+#loc1881 = loc("forward":4294967295:6535)
+#loc1882 = loc("forward":4294967295:6536)
+#loc1883 = loc("forward":4294967295:6537)
+#loc1884 = loc("forward":4294967295:6538)
+#loc1885 = loc("forward":4294967295:6539)
+#loc1886 = loc("forward":4294967295:6540)
+#loc1887 = loc("forward":4294967295:6541)
+#loc1888 = loc("forward":4294967295:6543)
+#loc1889 = loc("forward":4294967295:6545)
+#loc1890 = loc("forward":4294967295:6546)
+#loc1891 = loc("forward":4294967295:6547)
+#loc1892 = loc("forward":4294967295:6549)
+#loc1893 = loc("forward":4294967295:6550)
+#loc1894 = loc("forward":4294967295:6551)
+#loc1895 = loc("forward":4294967295:6552)
+#loc1896 = loc("forward":4294967295:6553)
+#loc1897 = loc("forward":4294967295:6554)
+#loc1898 = loc("forward":4294967295:6555)
+#loc1899 = loc("forward":4294967295:6556)
+#loc1900 = loc("forward":4294967295:6557)
+#loc1901 = loc("forward":4294967295:6558)
+#loc1902 = loc("forward":4294967295:6560)
+#loc1903 = loc("forward":4294967295:6561)
+#loc1904 = loc("forward":4294967295:6562)
+#loc1905 = loc("forward":4294967295:6564)
+#loc1906 = loc("forward":4294967295:6565)
+#loc1907 = loc("forward":4294967295:6567)
+#loc1908 = loc("forward":4294967295:6568)
+#loc1909 = loc("forward":4294967295:6569)
+#loc1910 = loc("forward":4294967295:6570)
+#loc1911 = loc("forward":4294967295:6571)
+#loc1912 = loc("forward":4294967295:6572)
+#loc1913 = loc("forward":4294967295:6574)
+#loc1914 = loc("forward":4294967295:6575)
+#loc1915 = loc("forward":4294967295:6576)
+#loc1916 = loc("forward":4294967295:6577)
+#loc1917 = loc("forward":4294967295:6579)
+#loc1918 = loc("forward":4294967295:6580)
+#loc1919 = loc("forward":4294967295:6581)
+#loc1920 = loc("forward":4294967295:6583)
+#loc1921 = loc("forward":4294967295:6584)
+#loc1922 = loc("forward":4294967295:6586)
+#loc1923 = loc("forward":4294967295:6587)
+#loc1924 = loc("forward":4294967295:6589)
+#loc1925 = loc("forward":4294967295:6590)
+#loc1926 = loc("forward":4294967295:6591)
+#loc1927 = loc("forward":4294967295:6592)
+#loc1928 = loc("forward":4294967295:6593)
+#loc1929 = loc("forward":4294967295:6594)
+#loc1930 = loc("forward":4294967295:6596)
+#loc1931 = loc("forward":4294967295:6597)
+#loc1932 = loc("forward":4294967295:6598)
+#loc1933 = loc("forward":4294967295:6600)
+#loc1934 = loc("forward":4294967295:6601)
+#loc1935 = loc("forward":4294967295:6602)
+#loc1936 = loc("forward":4294967295:6603)
+#loc1937 = loc("forward":4294967295:6605)
+#loc1938 = loc("forward":4294967295:6606)
+#loc1939 = loc("forward":4294967295:6607)
+#loc1940 = loc("forward":4294967295:6609)
+#loc1941 = loc("forward":4294967295:6611)
+#loc1942 = loc("forward":4294967295:6612)
+#loc1943 = loc("forward":4294967295:6613)
+#loc1944 = loc("forward":4294967295:6614)
+#loc1945 = loc("forward":4294967295:6615)
+#loc1946 = loc("forward":4294967295:6616)
+#loc1947 = loc("forward":4294967295:6617)
+#loc1948 = loc("forward":4294967295:6618)
+#loc1949 = loc("forward":4294967295:6619)
+#loc1950 = loc("forward":4294967295:6621)
+#loc1951 = loc("forward":4294967295:6622)
+#loc1952 = loc("forward":4294967295:6623)
+#loc1953 = loc("forward":4294967295:6624)
+#loc1954 = loc("forward":4294967295:6626)
+#loc1955 = loc("forward":4294967295:6627)
+#loc1956 = loc("forward":4294967295:6628)
+#loc1957 = loc("forward":4294967295:6630)
+#loc1958 = loc("forward":4294967295:6632)
+#loc1959 = loc("forward":4294967295:6633)
+#loc1960 = loc("forward":4294967295:6634)
+#loc1961 = loc("forward":4294967295:6635)
+#loc1962 = loc("forward":4294967295:6636)
+#loc1963 = loc("forward":4294967295:6637)
+#loc1964 = loc("forward":4294967295:6638)
+#loc1965 = loc("forward":4294967295:6639)
+#loc1966 = loc("forward":4294967295:6640)
+#loc1967 = loc("forward":4294967295:6641)
+#loc1968 = loc("forward":4294967295:6643)
+#loc1969 = loc("forward":4294967295:6645)
+#loc1970 = loc("forward":4294967295:6646)
+#loc1971 = loc("forward":4294967295:6647)
+#loc1972 = loc("forward":4294967295:6649)
+#loc1973 = loc("forward":4294967295:6650)
+#loc1974 = loc("forward":4294967295:6651)
+#loc1975 = loc("forward":4294967295:6652)
+#loc1976 = loc("forward":4294967295:6653)
+#loc1977 = loc("forward":4294967295:6654)
+#loc1978 = loc("forward":4294967295:6655)
+#loc1979 = loc("forward":4294967295:6656)
+#loc1980 = loc("forward":4294967295:6657)
+#loc1981 = loc("forward":4294967295:6658)
+#loc1982 = loc("forward":4294967295:6660)
+#loc1983 = loc("forward":4294967295:6661)
+#loc1984 = loc("forward":4294967295:6662)
+#loc1985 = loc("forward":4294967295:6664)
+#loc1986 = loc("forward":4294967295:6665)
+#loc1987 = loc("forward":4294967295:6667)
+#loc1988 = loc("forward":4294967295:6668)
+#loc1989 = loc("forward":4294967295:6669)
+#loc1990 = loc("forward":4294967295:6670)
+#loc1991 = loc("forward":4294967295:6671)
+#loc1992 = loc("forward":4294967295:6672)
+#loc1993 = loc("forward":4294967295:6674)
+#loc1994 = loc("forward":4294967295:6675)
+#loc1995 = loc("forward":4294967295:6676)
+#loc1996 = loc("forward":4294967295:6677)
+#loc1997 = loc("forward":4294967295:6679)
+#loc1998 = loc("forward":4294967295:6680)
+#loc1999 = loc("forward":4294967295:6681)
+#loc2000 = loc("forward":4294967295:6683)
+#loc2001 = loc("forward":4294967295:6684)
+#loc2002 = loc("forward":4294967295:6686)
+#loc2003 = loc("forward":4294967295:6687)
+#loc2004 = loc("forward":4294967295:6689)
+#loc2005 = loc("forward":4294967295:6690)
+#loc2006 = loc("forward":4294967295:6691)
+#loc2007 = loc("forward":4294967295:6692)
+#loc2008 = loc("forward":4294967295:6693)
+#loc2009 = loc("forward":4294967295:6694)
+#loc2010 = loc("forward":4294967295:6696)
+#loc2011 = loc("forward":4294967295:6697)
+#loc2012 = loc("forward":4294967295:6698)
+#loc2013 = loc("forward":4294967295:6700)
+#loc2014 = loc("forward":4294967295:6701)
+#loc2015 = loc("forward":4294967295:6702)
+#loc2016 = loc("forward":4294967295:6703)
+#loc2017 = loc("forward":4294967295:6705)
+#loc2018 = loc("forward":4294967295:6706)
+#loc2019 = loc("forward":4294967295:6707)
+#loc2020 = loc("forward":4294967295:6709)
+#loc2021 = loc("forward":4294967295:6711)
+#loc2022 = loc("forward":4294967295:6712)
+#loc2023 = loc("forward":4294967295:6713)
+#loc2024 = loc("forward":4294967295:6714)
+#loc2025 = loc("forward":4294967295:6715)
+#loc2026 = loc("forward":4294967295:6716)
+#loc2027 = loc("forward":4294967295:6717)
+#loc2028 = loc("forward":4294967295:6718)
+#loc2029 = loc("forward":4294967295:6719)
+#loc2030 = loc("forward":4294967295:6721)
+#loc2031 = loc("forward":4294967295:6722)
+#loc2032 = loc("forward":4294967295:6723)
+#loc2033 = loc("forward":4294967295:6724)
+#loc2034 = loc("forward":4294967295:6726)
+#loc2035 = loc("forward":4294967295:6727)
+#loc2036 = loc("forward":4294967295:6728)
+#loc2037 = loc("forward":4294967295:6730)
+#loc2038 = loc("forward":4294967295:6732)
+#loc2039 = loc("forward":4294967295:6733)
+#loc2040 = loc("forward":4294967295:6734)
+#loc2041 = loc("forward":4294967295:6735)
+#loc2042 = loc("forward":4294967295:6736)
+#loc2043 = loc("forward":4294967295:6737)
+#loc2044 = loc("forward":4294967295:6738)
+#loc2045 = loc("forward":4294967295:6739)
+#loc2046 = loc("forward":4294967295:6740)
+#loc2047 = loc("forward":4294967295:6741)
+#loc2048 = loc("forward":4294967295:6743)
+#loc2049 = loc("forward":4294967295:6745)
+#loc2050 = loc("forward":4294967295:6746)
+#loc2051 = loc("forward":4294967295:6747)
+#loc2052 = loc("forward":4294967295:6749)
+#loc2053 = loc("forward":4294967295:6750)
+#loc2054 = loc("forward":4294967295:6751)
+#loc2055 = loc("forward":4294967295:6752)
+#loc2056 = loc("forward":4294967295:6753)
+#loc2057 = loc("forward":4294967295:6754)
+#loc2058 = loc("forward":4294967295:6755)
+#loc2059 = loc("forward":4294967295:6756)
+#loc2060 = loc("forward":4294967295:6757)
+#loc2061 = loc("forward":4294967295:6758)
+#loc2062 = loc("forward":4294967295:6760)
+#loc2063 = loc("forward":4294967295:6761)
+#loc2064 = loc("forward":4294967295:6762)
+#loc2065 = loc("forward":4294967295:6764)
+#loc2066 = loc("forward":4294967295:6765)
+#loc2067 = loc("forward":4294967295:6767)
+#loc2068 = loc("forward":4294967295:6768)
+#loc2069 = loc("forward":4294967295:6769)
+#loc2070 = loc("forward":4294967295:6770)
+#loc2071 = loc("forward":4294967295:6771)
+#loc2072 = loc("forward":4294967295:6772)
+#loc2073 = loc("forward":4294967295:6774)
+#loc2074 = loc("forward":4294967295:6775)
+#loc2075 = loc("forward":4294967295:6776)
+#loc2076 = loc("forward":4294967295:6777)
+#loc2077 = loc("forward":4294967295:6779)
+#loc2078 = loc("forward":4294967295:6780)
+#loc2079 = loc("forward":4294967295:6781)
+#loc2080 = loc("forward":4294967295:6783)
+#loc2081 = loc("forward":4294967295:6784)
+#loc2082 = loc("forward":4294967295:6785)
+#loc2083 = loc("forward":4294967295:6786)
+#loc2084 = loc("forward":4294967295:6788)
+#loc2085 = loc("forward":4294967295:6789)
+#loc2086 = loc("forward":4294967295:6790)
+#loc2087 = loc("forward":4294967295:6791)
+#loc2088 = loc("forward":4294967295:6792)
+#loc2089 = loc("forward":4294967295:6794)
+#loc2090 = loc(unknown)
+#loc2091 = loc("embedding_1"(#loc1))
+#loc2092 = loc("multiply_2"(#loc2))
+#loc2093 = loc("reduce_avg_3"(#loc3))
+#loc2094 = loc("add_4"(#loc4))
+#loc2095 = loc("sqrt_5"(#loc5))
+#loc2096 = loc("reciprocal_6"(#loc6))
+#loc2097 = loc("multiply_7"(#loc7))
+#loc2098 = loc("multiply_8"(#loc8))
+#loc2099 = loc("reshape_9.dc.squeeze.0"(#loc9))
+#loc2100 = loc("matmul_11"(#loc10))
+#loc2101 = loc("reshape_12"(#loc11))
+#loc2102 = loc("transpose_13"(#loc12))
+#loc2103 = loc("concatenate_20"(#loc13))
+#loc2104 = loc("cosine_21"(#loc14))
+#loc2105 = loc("unsqueeze_22"(#loc15))
+#loc2106 = loc("multiply_23"(#loc16))
+#loc2107 = loc("index_24.dc.transpose.0"(#loc17))
+#loc2108 = loc("index_24.dc.matmul.2"(#loc18))
+#loc2109 = loc("index_24.dc.transpose.3"(#loc19))
+#loc2110 = loc("multiply_25"(#loc20))
+#loc2111 = loc("index_26.dc.transpose.0"(#loc21))
+#loc2112 = loc("index_26.dc.matmul.2"(#loc22))
+#loc2113 = loc("index_26.dc.transpose.3"(#loc23))
+#loc2114 = loc("concatenate_27"(#loc24))
+#loc2115 = loc("sine_28"(#loc25))
+#loc2116 = loc("unsqueeze_29"(#loc26))
+#loc2117 = loc("multiply_30"(#loc27))
+#loc2118 = loc("add_31"(#loc28))
+#loc2119 = loc("reshape_32.dc.squeeze.0"(#loc29))
+#loc2120 = loc("matmul_34"(#loc30))
+#loc2121 = loc("reshape_35"(#loc31))
+#loc2122 = loc("transpose_36"(#loc32))
+#loc2123 = loc("multiply_37"(#loc33))
+#loc2124 = loc("index_38.dc.transpose.0"(#loc34))
+#loc2125 = loc("index_38.dc.matmul.2"(#loc35))
+#loc2126 = loc("index_38.dc.transpose.3"(#loc36))
+#loc2127 = loc("multiply_39"(#loc37))
+#loc2128 = loc("index_40.dc.transpose.0"(#loc38))
+#loc2129 = loc("index_40.dc.matmul.2"(#loc39))
+#loc2130 = loc("index_40.dc.transpose.3"(#loc40))
+#loc2131 = loc("concatenate_41"(#loc41))
+#loc2132 = loc("multiply_42"(#loc42))
+#loc2133 = loc("add_43"(#loc43))
+#loc2134 = loc("reshape_44.dc.squeeze.0"(#loc44))
+#loc2135 = loc("transpose_45"(#loc45))
+#loc2136 = loc("matmul_46"(#loc46))
+#loc2137 = loc("reshape_47.dc.unsqueeze.0"(#loc47))
+#loc2138 = loc("multiply_48"(#loc48))
+#loc2139 = loc("add_49"(#loc49))
+#loc2140 = loc("softmax_50"(#loc50))
+#loc2141 = loc("reshape_52.dc.squeeze.0"(#loc51))
+#loc2142 = loc("matmul_54"(#loc52))
+#loc2143 = loc("reshape_55"(#loc53))
+#loc2144 = loc("transpose_56"(#loc54))
+#loc2145 = loc("transpose_57"(#loc55))
+#loc2146 = loc("reshape_58.dc.squeeze.0"(#loc56))
+#loc2147 = loc("transpose_59"(#loc57))
+#loc2148 = loc("matmul_60"(#loc58))
+#loc2149 = loc("reshape_61.dc.unsqueeze.0"(#loc59))
+#loc2150 = loc("transpose_62"(#loc60))
+#loc2151 = loc("reshape_63"(#loc61))
+#loc2152 = loc("matmul_65"(#loc62))
+#loc2153 = loc("reshape_66.dc.unsqueeze.0"(#loc63))
+#loc2154 = loc("add_67"(#loc64))
+#loc2155 = loc("multiply_68"(#loc65))
+#loc2156 = loc("reduce_avg_69"(#loc66))
+#loc2157 = loc("add_70"(#loc67))
+#loc2158 = loc("sqrt_71"(#loc68))
+#loc2159 = loc("reciprocal_72"(#loc69))
+#loc2160 = loc("multiply_73"(#loc70))
+#loc2161 = loc("multiply_74"(#loc71))
+#loc2162 = loc("reshape_75.dc.squeeze.0"(#loc72))
+#loc2163 = loc("matmul_77"(#loc73))
+#loc2164 = loc("reshape_78.dc.unsqueeze.0"(#loc74))
+#loc2165 = loc("sigmoid_79"(#loc75))
+#loc2166 = loc("multiply_80"(#loc76))
+#loc2167 = loc("matmul_82"(#loc77))
+#loc2168 = loc("reshape_83.dc.unsqueeze.0"(#loc78))
+#loc2169 = loc("multiply_84"(#loc79))
+#loc2170 = loc("matmul_86"(#loc80))
+#loc2171 = loc("add_87"(#loc81))
+#loc2172 = loc("multiply_88"(#loc82))
+#loc2173 = loc("reduce_avg_89"(#loc83))
+#loc2174 = loc("add_90"(#loc84))
+#loc2175 = loc("sqrt_91"(#loc85))
+#loc2176 = loc("reciprocal_92"(#loc86))
+#loc2177 = loc("multiply_93"(#loc87))
+#loc2178 = loc("multiply_94"(#loc88))
+#loc2179 = loc("reshape_95.dc.squeeze.0"(#loc89))
+#loc2180 = loc("matmul_97"(#loc90))
+#loc2181 = loc("reshape_98"(#loc91))
+#loc2182 = loc("transpose_99"(#loc92))
+#loc2183 = loc("concatenate_106"(#loc93))
+#loc2184 = loc("cosine_107"(#loc94))
+#loc2185 = loc("unsqueeze_108"(#loc95))
+#loc2186 = loc("multiply_109"(#loc96))
+#loc2187 = loc("index_110.dc.transpose.0"(#loc97))
+#loc2188 = loc("index_110.dc.matmul.2"(#loc98))
+#loc2189 = loc("index_110.dc.transpose.3"(#loc99))
+#loc2190 = loc("multiply_111"(#loc100))
+#loc2191 = loc("index_112.dc.transpose.0"(#loc101))
+#loc2192 = loc("index_112.dc.matmul.2"(#loc102))
+#loc2193 = loc("index_112.dc.transpose.3"(#loc103))
+#loc2194 = loc("concatenate_113"(#loc104))
+#loc2195 = loc("sine_114"(#loc105))
+#loc2196 = loc("unsqueeze_115"(#loc106))
+#loc2197 = loc("multiply_116"(#loc107))
+#loc2198 = loc("add_117"(#loc108))
+#loc2199 = loc("reshape_118.dc.squeeze.0"(#loc109))
+#loc2200 = loc("matmul_120"(#loc110))
+#loc2201 = loc("reshape_121"(#loc111))
+#loc2202 = loc("transpose_122"(#loc112))
+#loc2203 = loc("multiply_123"(#loc113))
+#loc2204 = loc("index_124.dc.transpose.0"(#loc114))
+#loc2205 = loc("index_124.dc.matmul.2"(#loc115))
+#loc2206 = loc("index_124.dc.transpose.3"(#loc116))
+#loc2207 = loc("multiply_125"(#loc117))
+#loc2208 = loc("index_126.dc.transpose.0"(#loc118))
+#loc2209 = loc("index_126.dc.matmul.2"(#loc119))
+#loc2210 = loc("index_126.dc.transpose.3"(#loc120))
+#loc2211 = loc("concatenate_127"(#loc121))
+#loc2212 = loc("multiply_128"(#loc122))
+#loc2213 = loc("add_129"(#loc123))
+#loc2214 = loc("reshape_130.dc.squeeze.0"(#loc124))
+#loc2215 = loc("transpose_131"(#loc125))
+#loc2216 = loc("matmul_132"(#loc126))
+#loc2217 = loc("reshape_133.dc.unsqueeze.0"(#loc127))
+#loc2218 = loc("multiply_134"(#loc128))
+#loc2219 = loc("add_135"(#loc129))
+#loc2220 = loc("softmax_136"(#loc130))
+#loc2221 = loc("reshape_138.dc.squeeze.0"(#loc131))
+#loc2222 = loc("matmul_140"(#loc132))
+#loc2223 = loc("reshape_141"(#loc133))
+#loc2224 = loc("transpose_142"(#loc134))
+#loc2225 = loc("transpose_143"(#loc135))
+#loc2226 = loc("reshape_144.dc.squeeze.0"(#loc136))
+#loc2227 = loc("transpose_145"(#loc137))
+#loc2228 = loc("matmul_146"(#loc138))
+#loc2229 = loc("reshape_147.dc.unsqueeze.0"(#loc139))
+#loc2230 = loc("transpose_148"(#loc140))
+#loc2231 = loc("reshape_149"(#loc141))
+#loc2232 = loc("matmul_151"(#loc142))
+#loc2233 = loc("reshape_152.dc.unsqueeze.0"(#loc143))
+#loc2234 = loc("add_153"(#loc144))
+#loc2235 = loc("multiply_154"(#loc145))
+#loc2236 = loc("reduce_avg_155"(#loc146))
+#loc2237 = loc("add_156"(#loc147))
+#loc2238 = loc("sqrt_157"(#loc148))
+#loc2239 = loc("reciprocal_158"(#loc149))
+#loc2240 = loc("multiply_159"(#loc150))
+#loc2241 = loc("multiply_160"(#loc151))
+#loc2242 = loc("reshape_161.dc.squeeze.0"(#loc152))
+#loc2243 = loc("matmul_163"(#loc153))
+#loc2244 = loc("reshape_164.dc.unsqueeze.0"(#loc154))
+#loc2245 = loc("sigmoid_165"(#loc155))
+#loc2246 = loc("multiply_166"(#loc156))
+#loc2247 = loc("matmul_168"(#loc157))
+#loc2248 = loc("reshape_169.dc.unsqueeze.0"(#loc158))
+#loc2249 = loc("multiply_170"(#loc159))
+#loc2250 = loc("matmul_172"(#loc160))
+#loc2251 = loc("add_173"(#loc161))
+#loc2252 = loc("multiply_174"(#loc162))
+#loc2253 = loc("reduce_avg_175"(#loc163))
+#loc2254 = loc("add_176"(#loc164))
+#loc2255 = loc("sqrt_177"(#loc165))
+#loc2256 = loc("reciprocal_178"(#loc166))
+#loc2257 = loc("multiply_179"(#loc167))
+#loc2258 = loc("multiply_180"(#loc168))
+#loc2259 = loc("reshape_181.dc.squeeze.0"(#loc169))
+#loc2260 = loc("matmul_183"(#loc170))
+#loc2261 = loc("reshape_184"(#loc171))
+#loc2262 = loc("transpose_185"(#loc172))
+#loc2263 = loc("concatenate_192"(#loc173))
+#loc2264 = loc("cosine_193"(#loc174))
+#loc2265 = loc("unsqueeze_194"(#loc175))
+#loc2266 = loc("multiply_195"(#loc176))
+#loc2267 = loc("index_196.dc.transpose.0"(#loc177))
+#loc2268 = loc("index_196.dc.matmul.2"(#loc178))
+#loc2269 = loc("index_196.dc.transpose.3"(#loc179))
+#loc2270 = loc("multiply_197"(#loc180))
+#loc2271 = loc("index_198.dc.transpose.0"(#loc181))
+#loc2272 = loc("index_198.dc.matmul.2"(#loc182))
+#loc2273 = loc("index_198.dc.transpose.3"(#loc183))
+#loc2274 = loc("concatenate_199"(#loc184))
+#loc2275 = loc("sine_200"(#loc185))
+#loc2276 = loc("unsqueeze_201"(#loc186))
+#loc2277 = loc("multiply_202"(#loc187))
+#loc2278 = loc("add_203"(#loc188))
+#loc2279 = loc("reshape_204.dc.squeeze.0"(#loc189))
+#loc2280 = loc("matmul_206"(#loc190))
+#loc2281 = loc("reshape_207"(#loc191))
+#loc2282 = loc("transpose_208"(#loc192))
+#loc2283 = loc("multiply_209"(#loc193))
+#loc2284 = loc("index_210.dc.transpose.0"(#loc194))
+#loc2285 = loc("index_210.dc.matmul.2"(#loc195))
+#loc2286 = loc("index_210.dc.transpose.3"(#loc196))
+#loc2287 = loc("multiply_211"(#loc197))
+#loc2288 = loc("index_212.dc.transpose.0"(#loc198))
+#loc2289 = loc("index_212.dc.matmul.2"(#loc199))
+#loc2290 = loc("index_212.dc.transpose.3"(#loc200))
+#loc2291 = loc("concatenate_213"(#loc201))
+#loc2292 = loc("multiply_214"(#loc202))
+#loc2293 = loc("add_215"(#loc203))
+#loc2294 = loc("reshape_216.dc.squeeze.0"(#loc204))
+#loc2295 = loc("transpose_217"(#loc205))
+#loc2296 = loc("matmul_218"(#loc206))
+#loc2297 = loc("reshape_219.dc.unsqueeze.0"(#loc207))
+#loc2298 = loc("multiply_220"(#loc208))
+#loc2299 = loc("add_221"(#loc209))
+#loc2300 = loc("softmax_222"(#loc210))
+#loc2301 = loc("reshape_224.dc.squeeze.0"(#loc211))
+#loc2302 = loc("matmul_226"(#loc212))
+#loc2303 = loc("reshape_227"(#loc213))
+#loc2304 = loc("transpose_228"(#loc214))
+#loc2305 = loc("transpose_229"(#loc215))
+#loc2306 = loc("reshape_230.dc.squeeze.0"(#loc216))
+#loc2307 = loc("transpose_231"(#loc217))
+#loc2308 = loc("matmul_232"(#loc218))
+#loc2309 = loc("reshape_233.dc.unsqueeze.0"(#loc219))
+#loc2310 = loc("transpose_234"(#loc220))
+#loc2311 = loc("reshape_235"(#loc221))
+#loc2312 = loc("matmul_237"(#loc222))
+#loc2313 = loc("reshape_238.dc.unsqueeze.0"(#loc223))
+#loc2314 = loc("add_239"(#loc224))
+#loc2315 = loc("multiply_240"(#loc225))
+#loc2316 = loc("reduce_avg_241"(#loc226))
+#loc2317 = loc("add_242"(#loc227))
+#loc2318 = loc("sqrt_243"(#loc228))
+#loc2319 = loc("reciprocal_244"(#loc229))
+#loc2320 = loc("multiply_245"(#loc230))
+#loc2321 = loc("multiply_246"(#loc231))
+#loc2322 = loc("reshape_247.dc.squeeze.0"(#loc232))
+#loc2323 = loc("matmul_249"(#loc233))
+#loc2324 = loc("reshape_250.dc.unsqueeze.0"(#loc234))
+#loc2325 = loc("sigmoid_251"(#loc235))
+#loc2326 = loc("multiply_252"(#loc236))
+#loc2327 = loc("matmul_254"(#loc237))
+#loc2328 = loc("reshape_255.dc.unsqueeze.0"(#loc238))
+#loc2329 = loc("multiply_256"(#loc239))
+#loc2330 = loc("matmul_258"(#loc240))
+#loc2331 = loc("add_259"(#loc241))
+#loc2332 = loc("multiply_260"(#loc242))
+#loc2333 = loc("reduce_avg_261"(#loc243))
+#loc2334 = loc("add_262"(#loc244))
+#loc2335 = loc("sqrt_263"(#loc245))
+#loc2336 = loc("reciprocal_264"(#loc246))
+#loc2337 = loc("multiply_265"(#loc247))
+#loc2338 = loc("multiply_266"(#loc248))
+#loc2339 = loc("reshape_267.dc.squeeze.0"(#loc249))
+#loc2340 = loc("matmul_269"(#loc250))
+#loc2341 = loc("reshape_270"(#loc251))
+#loc2342 = loc("transpose_271"(#loc252))
+#loc2343 = loc("concatenate_278"(#loc253))
+#loc2344 = loc("cosine_279"(#loc254))
+#loc2345 = loc("unsqueeze_280"(#loc255))
+#loc2346 = loc("multiply_281"(#loc256))
+#loc2347 = loc("index_282.dc.transpose.0"(#loc257))
+#loc2348 = loc("index_282.dc.matmul.2"(#loc258))
+#loc2349 = loc("index_282.dc.transpose.3"(#loc259))
+#loc2350 = loc("multiply_283"(#loc260))
+#loc2351 = loc("index_284.dc.transpose.0"(#loc261))
+#loc2352 = loc("index_284.dc.matmul.2"(#loc262))
+#loc2353 = loc("index_284.dc.transpose.3"(#loc263))
+#loc2354 = loc("concatenate_285"(#loc264))
+#loc2355 = loc("sine_286"(#loc265))
+#loc2356 = loc("unsqueeze_287"(#loc266))
+#loc2357 = loc("multiply_288"(#loc267))
+#loc2358 = loc("add_289"(#loc268))
+#loc2359 = loc("reshape_290.dc.squeeze.0"(#loc269))
+#loc2360 = loc("matmul_292"(#loc270))
+#loc2361 = loc("reshape_293"(#loc271))
+#loc2362 = loc("transpose_294"(#loc272))
+#loc2363 = loc("multiply_295"(#loc273))
+#loc2364 = loc("index_296.dc.transpose.0"(#loc274))
+#loc2365 = loc("index_296.dc.matmul.2"(#loc275))
+#loc2366 = loc("index_296.dc.transpose.3"(#loc276))
+#loc2367 = loc("multiply_297"(#loc277))
+#loc2368 = loc("index_298.dc.transpose.0"(#loc278))
+#loc2369 = loc("index_298.dc.matmul.2"(#loc279))
+#loc2370 = loc("index_298.dc.transpose.3"(#loc280))
+#loc2371 = loc("concatenate_299"(#loc281))
+#loc2372 = loc("multiply_300"(#loc282))
+#loc2373 = loc("add_301"(#loc283))
+#loc2374 = loc("reshape_302.dc.squeeze.0"(#loc284))
+#loc2375 = loc("transpose_303"(#loc285))
+#loc2376 = loc("matmul_304"(#loc286))
+#loc2377 = loc("reshape_305.dc.unsqueeze.0"(#loc287))
+#loc2378 = loc("multiply_306"(#loc288))
+#loc2379 = loc("add_307"(#loc289))
+#loc2380 = loc("softmax_308"(#loc290))
+#loc2381 = loc("reshape_310.dc.squeeze.0"(#loc291))
+#loc2382 = loc("matmul_312"(#loc292))
+#loc2383 = loc("reshape_313"(#loc293))
+#loc2384 = loc("transpose_314"(#loc294))
+#loc2385 = loc("transpose_315"(#loc295))
+#loc2386 = loc("reshape_316.dc.squeeze.0"(#loc296))
+#loc2387 = loc("transpose_317"(#loc297))
+#loc2388 = loc("matmul_318"(#loc298))
+#loc2389 = loc("reshape_319.dc.unsqueeze.0"(#loc299))
+#loc2390 = loc("transpose_320"(#loc300))
+#loc2391 = loc("reshape_321"(#loc301))
+#loc2392 = loc("matmul_323"(#loc302))
+#loc2393 = loc("reshape_324.dc.unsqueeze.0"(#loc303))
+#loc2394 = loc("add_325"(#loc304))
+#loc2395 = loc("multiply_326"(#loc305))
+#loc2396 = loc("reduce_avg_327"(#loc306))
+#loc2397 = loc("add_328"(#loc307))
+#loc2398 = loc("sqrt_329"(#loc308))
+#loc2399 = loc("reciprocal_330"(#loc309))
+#loc2400 = loc("multiply_331"(#loc310))
+#loc2401 = loc("multiply_332"(#loc311))
+#loc2402 = loc("reshape_333.dc.squeeze.0"(#loc312))
+#loc2403 = loc("matmul_335"(#loc313))
+#loc2404 = loc("reshape_336.dc.unsqueeze.0"(#loc314))
+#loc2405 = loc("sigmoid_337"(#loc315))
+#loc2406 = loc("multiply_338"(#loc316))
+#loc2407 = loc("matmul_340"(#loc317))
+#loc2408 = loc("reshape_341.dc.unsqueeze.0"(#loc318))
+#loc2409 = loc("multiply_342"(#loc319))
+#loc2410 = loc("matmul_344"(#loc320))
+#loc2411 = loc("add_345"(#loc321))
+#loc2412 = loc("multiply_346"(#loc322))
+#loc2413 = loc("reduce_avg_347"(#loc323))
+#loc2414 = loc("add_348"(#loc324))
+#loc2415 = loc("sqrt_349"(#loc325))
+#loc2416 = loc("reciprocal_350"(#loc326))
+#loc2417 = loc("multiply_351"(#loc327))
+#loc2418 = loc("multiply_352"(#loc328))
+#loc2419 = loc("reshape_353.dc.squeeze.0"(#loc329))
+#loc2420 = loc("matmul_355"(#loc330))
+#loc2421 = loc("reshape_356"(#loc331))
+#loc2422 = loc("transpose_357"(#loc332))
+#loc2423 = loc("concatenate_364"(#loc333))
+#loc2424 = loc("cosine_365"(#loc334))
+#loc2425 = loc("unsqueeze_366"(#loc335))
+#loc2426 = loc("multiply_367"(#loc336))
+#loc2427 = loc("index_368.dc.transpose.0"(#loc337))
+#loc2428 = loc("index_368.dc.matmul.2"(#loc338))
+#loc2429 = loc("index_368.dc.transpose.3"(#loc339))
+#loc2430 = loc("multiply_369"(#loc340))
+#loc2431 = loc("index_370.dc.transpose.0"(#loc341))
+#loc2432 = loc("index_370.dc.matmul.2"(#loc342))
+#loc2433 = loc("index_370.dc.transpose.3"(#loc343))
+#loc2434 = loc("concatenate_371"(#loc344))
+#loc2435 = loc("sine_372"(#loc345))
+#loc2436 = loc("unsqueeze_373"(#loc346))
+#loc2437 = loc("multiply_374"(#loc347))
+#loc2438 = loc("add_375"(#loc348))
+#loc2439 = loc("reshape_376.dc.squeeze.0"(#loc349))
+#loc2440 = loc("matmul_378"(#loc350))
+#loc2441 = loc("reshape_379"(#loc351))
+#loc2442 = loc("transpose_380"(#loc352))
+#loc2443 = loc("multiply_381"(#loc353))
+#loc2444 = loc("index_382.dc.transpose.0"(#loc354))
+#loc2445 = loc("index_382.dc.matmul.2"(#loc355))
+#loc2446 = loc("index_382.dc.transpose.3"(#loc356))
+#loc2447 = loc("multiply_383"(#loc357))
+#loc2448 = loc("index_384.dc.transpose.0"(#loc358))
+#loc2449 = loc("index_384.dc.matmul.2"(#loc359))
+#loc2450 = loc("index_384.dc.transpose.3"(#loc360))
+#loc2451 = loc("concatenate_385"(#loc361))
+#loc2452 = loc("multiply_386"(#loc362))
+#loc2453 = loc("add_387"(#loc363))
+#loc2454 = loc("reshape_388.dc.squeeze.0"(#loc364))
+#loc2455 = loc("transpose_389"(#loc365))
+#loc2456 = loc("matmul_390"(#loc366))
+#loc2457 = loc("reshape_391.dc.unsqueeze.0"(#loc367))
+#loc2458 = loc("multiply_392"(#loc368))
+#loc2459 = loc("add_393"(#loc369))
+#loc2460 = loc("softmax_394"(#loc370))
+#loc2461 = loc("reshape_396.dc.squeeze.0"(#loc371))
+#loc2462 = loc("matmul_398"(#loc372))
+#loc2463 = loc("reshape_399"(#loc373))
+#loc2464 = loc("transpose_400"(#loc374))
+#loc2465 = loc("transpose_401"(#loc375))
+#loc2466 = loc("reshape_402.dc.squeeze.0"(#loc376))
+#loc2467 = loc("transpose_403"(#loc377))
+#loc2468 = loc("matmul_404"(#loc378))
+#loc2469 = loc("reshape_405.dc.unsqueeze.0"(#loc379))
+#loc2470 = loc("transpose_406"(#loc380))
+#loc2471 = loc("reshape_407"(#loc381))
+#loc2472 = loc("matmul_409"(#loc382))
+#loc2473 = loc("reshape_410.dc.unsqueeze.0"(#loc383))
+#loc2474 = loc("add_411"(#loc384))
+#loc2475 = loc("multiply_412"(#loc385))
+#loc2476 = loc("reduce_avg_413"(#loc386))
+#loc2477 = loc("add_414"(#loc387))
+#loc2478 = loc("sqrt_415"(#loc388))
+#loc2479 = loc("reciprocal_416"(#loc389))
+#loc2480 = loc("multiply_417"(#loc390))
+#loc2481 = loc("multiply_418"(#loc391))
+#loc2482 = loc("reshape_419.dc.squeeze.0"(#loc392))
+#loc2483 = loc("matmul_421"(#loc393))
+#loc2484 = loc("reshape_422.dc.unsqueeze.0"(#loc394))
+#loc2485 = loc("sigmoid_423"(#loc395))
+#loc2486 = loc("multiply_424"(#loc396))
+#loc2487 = loc("matmul_426"(#loc397))
+#loc2488 = loc("reshape_427.dc.unsqueeze.0"(#loc398))
+#loc2489 = loc("multiply_428"(#loc399))
+#loc2490 = loc("matmul_430"(#loc400))
+#loc2491 = loc("add_431"(#loc401))
+#loc2492 = loc("multiply_432"(#loc402))
+#loc2493 = loc("reduce_avg_433"(#loc403))
+#loc2494 = loc("add_434"(#loc404))
+#loc2495 = loc("sqrt_435"(#loc405))
+#loc2496 = loc("reciprocal_436"(#loc406))
+#loc2497 = loc("multiply_437"(#loc407))
+#loc2498 = loc("multiply_438"(#loc408))
+#loc2499 = loc("reshape_439.dc.squeeze.0"(#loc409))
+#loc2500 = loc("matmul_441"(#loc410))
+#loc2501 = loc("reshape_442"(#loc411))
+#loc2502 = loc("transpose_443"(#loc412))
+#loc2503 = loc("concatenate_450"(#loc413))
+#loc2504 = loc("cosine_451"(#loc414))
+#loc2505 = loc("unsqueeze_452"(#loc415))
+#loc2506 = loc("multiply_453"(#loc416))
+#loc2507 = loc("index_454.dc.transpose.0"(#loc417))
+#loc2508 = loc("index_454.dc.matmul.2"(#loc418))
+#loc2509 = loc("index_454.dc.transpose.3"(#loc419))
+#loc2510 = loc("multiply_455"(#loc420))
+#loc2511 = loc("index_456.dc.transpose.0"(#loc421))
+#loc2512 = loc("index_456.dc.matmul.2"(#loc422))
+#loc2513 = loc("index_456.dc.transpose.3"(#loc423))
+#loc2514 = loc("concatenate_457"(#loc424))
+#loc2515 = loc("sine_458"(#loc425))
+#loc2516 = loc("unsqueeze_459"(#loc426))
+#loc2517 = loc("multiply_460"(#loc427))
+#loc2518 = loc("add_461"(#loc428))
+#loc2519 = loc("reshape_462.dc.squeeze.0"(#loc429))
+#loc2520 = loc("matmul_464"(#loc430))
+#loc2521 = loc("reshape_465"(#loc431))
+#loc2522 = loc("transpose_466"(#loc432))
+#loc2523 = loc("multiply_467"(#loc433))
+#loc2524 = loc("index_468.dc.transpose.0"(#loc434))
+#loc2525 = loc("index_468.dc.matmul.2"(#loc435))
+#loc2526 = loc("index_468.dc.transpose.3"(#loc436))
+#loc2527 = loc("multiply_469"(#loc437))
+#loc2528 = loc("index_470.dc.transpose.0"(#loc438))
+#loc2529 = loc("index_470.dc.matmul.2"(#loc439))
+#loc2530 = loc("index_470.dc.transpose.3"(#loc440))
+#loc2531 = loc("concatenate_471"(#loc441))
+#loc2532 = loc("multiply_472"(#loc442))
+#loc2533 = loc("add_473"(#loc443))
+#loc2534 = loc("reshape_474.dc.squeeze.0"(#loc444))
+#loc2535 = loc("transpose_475"(#loc445))
+#loc2536 = loc("matmul_476"(#loc446))
+#loc2537 = loc("reshape_477.dc.unsqueeze.0"(#loc447))
+#loc2538 = loc("multiply_478"(#loc448))
+#loc2539 = loc("add_479"(#loc449))
+#loc2540 = loc("softmax_480"(#loc450))
+#loc2541 = loc("reshape_482.dc.squeeze.0"(#loc451))
+#loc2542 = loc("matmul_484"(#loc452))
+#loc2543 = loc("reshape_485"(#loc453))
+#loc2544 = loc("transpose_486"(#loc454))
+#loc2545 = loc("transpose_487"(#loc455))
+#loc2546 = loc("reshape_488.dc.squeeze.0"(#loc456))
+#loc2547 = loc("transpose_489"(#loc457))
+#loc2548 = loc("matmul_490"(#loc458))
+#loc2549 = loc("reshape_491.dc.unsqueeze.0"(#loc459))
+#loc2550 = loc("transpose_492"(#loc460))
+#loc2551 = loc("reshape_493"(#loc461))
+#loc2552 = loc("matmul_495"(#loc462))
+#loc2553 = loc("reshape_496.dc.unsqueeze.0"(#loc463))
+#loc2554 = loc("add_497"(#loc464))
+#loc2555 = loc("multiply_498"(#loc465))
+#loc2556 = loc("reduce_avg_499"(#loc466))
+#loc2557 = loc("add_500"(#loc467))
+#loc2558 = loc("sqrt_501"(#loc468))
+#loc2559 = loc("reciprocal_502"(#loc469))
+#loc2560 = loc("multiply_503"(#loc470))
+#loc2561 = loc("multiply_504"(#loc471))
+#loc2562 = loc("reshape_505.dc.squeeze.0"(#loc472))
+#loc2563 = loc("matmul_507"(#loc473))
+#loc2564 = loc("reshape_508.dc.unsqueeze.0"(#loc474))
+#loc2565 = loc("sigmoid_509"(#loc475))
+#loc2566 = loc("multiply_510"(#loc476))
+#loc2567 = loc("matmul_512"(#loc477))
+#loc2568 = loc("reshape_513.dc.unsqueeze.0"(#loc478))
+#loc2569 = loc("multiply_514"(#loc479))
+#loc2570 = loc("matmul_516"(#loc480))
+#loc2571 = loc("add_517"(#loc481))
+#loc2572 = loc("multiply_518"(#loc482))
+#loc2573 = loc("reduce_avg_519"(#loc483))
+#loc2574 = loc("add_520"(#loc484))
+#loc2575 = loc("sqrt_521"(#loc485))
+#loc2576 = loc("reciprocal_522"(#loc486))
+#loc2577 = loc("multiply_523"(#loc487))
+#loc2578 = loc("multiply_524"(#loc488))
+#loc2579 = loc("reshape_525.dc.squeeze.0"(#loc489))
+#loc2580 = loc("matmul_527"(#loc490))
+#loc2581 = loc("reshape_528"(#loc491))
+#loc2582 = loc("transpose_529"(#loc492))
+#loc2583 = loc("concatenate_536"(#loc493))
+#loc2584 = loc("cosine_537"(#loc494))
+#loc2585 = loc("unsqueeze_538"(#loc495))
+#loc2586 = loc("multiply_539"(#loc496))
+#loc2587 = loc("index_540.dc.transpose.0"(#loc497))
+#loc2588 = loc("index_540.dc.matmul.2"(#loc498))
+#loc2589 = loc("index_540.dc.transpose.3"(#loc499))
+#loc2590 = loc("multiply_541"(#loc500))
+#loc2591 = loc("index_542.dc.transpose.0"(#loc501))
+#loc2592 = loc("index_542.dc.matmul.2"(#loc502))
+#loc2593 = loc("index_542.dc.transpose.3"(#loc503))
+#loc2594 = loc("concatenate_543"(#loc504))
+#loc2595 = loc("sine_544"(#loc505))
+#loc2596 = loc("unsqueeze_545"(#loc506))
+#loc2597 = loc("multiply_546"(#loc507))
+#loc2598 = loc("add_547"(#loc508))
+#loc2599 = loc("reshape_548.dc.squeeze.0"(#loc509))
+#loc2600 = loc("matmul_550"(#loc510))
+#loc2601 = loc("reshape_551"(#loc511))
+#loc2602 = loc("transpose_552"(#loc512))
+#loc2603 = loc("multiply_553"(#loc513))
+#loc2604 = loc("index_554.dc.transpose.0"(#loc514))
+#loc2605 = loc("index_554.dc.matmul.2"(#loc515))
+#loc2606 = loc("index_554.dc.transpose.3"(#loc516))
+#loc2607 = loc("multiply_555"(#loc517))
+#loc2608 = loc("index_556.dc.transpose.0"(#loc518))
+#loc2609 = loc("index_556.dc.matmul.2"(#loc519))
+#loc2610 = loc("index_556.dc.transpose.3"(#loc520))
+#loc2611 = loc("concatenate_557"(#loc521))
+#loc2612 = loc("multiply_558"(#loc522))
+#loc2613 = loc("add_559"(#loc523))
+#loc2614 = loc("reshape_560.dc.squeeze.0"(#loc524))
+#loc2615 = loc("transpose_561"(#loc525))
+#loc2616 = loc("matmul_562"(#loc526))
+#loc2617 = loc("reshape_563.dc.unsqueeze.0"(#loc527))
+#loc2618 = loc("multiply_564"(#loc528))
+#loc2619 = loc("add_565"(#loc529))
+#loc2620 = loc("softmax_566"(#loc530))
+#loc2621 = loc("reshape_568.dc.squeeze.0"(#loc531))
+#loc2622 = loc("matmul_570"(#loc532))
+#loc2623 = loc("reshape_571"(#loc533))
+#loc2624 = loc("transpose_572"(#loc534))
+#loc2625 = loc("transpose_573"(#loc535))
+#loc2626 = loc("reshape_574.dc.squeeze.0"(#loc536))
+#loc2627 = loc("transpose_575"(#loc537))
+#loc2628 = loc("matmul_576"(#loc538))
+#loc2629 = loc("reshape_577.dc.unsqueeze.0"(#loc539))
+#loc2630 = loc("transpose_578"(#loc540))
+#loc2631 = loc("reshape_579"(#loc541))
+#loc2632 = loc("matmul_581"(#loc542))
+#loc2633 = loc("reshape_582.dc.unsqueeze.0"(#loc543))
+#loc2634 = loc("add_583"(#loc544))
+#loc2635 = loc("multiply_584"(#loc545))
+#loc2636 = loc("reduce_avg_585"(#loc546))
+#loc2637 = loc("add_586"(#loc547))
+#loc2638 = loc("sqrt_587"(#loc548))
+#loc2639 = loc("reciprocal_588"(#loc549))
+#loc2640 = loc("multiply_589"(#loc550))
+#loc2641 = loc("multiply_590"(#loc551))
+#loc2642 = loc("reshape_591.dc.squeeze.0"(#loc552))
+#loc2643 = loc("matmul_593"(#loc553))
+#loc2644 = loc("reshape_594.dc.unsqueeze.0"(#loc554))
+#loc2645 = loc("sigmoid_595"(#loc555))
+#loc2646 = loc("multiply_596"(#loc556))
+#loc2647 = loc("matmul_598"(#loc557))
+#loc2648 = loc("reshape_599.dc.unsqueeze.0"(#loc558))
+#loc2649 = loc("multiply_600"(#loc559))
+#loc2650 = loc("matmul_602"(#loc560))
+#loc2651 = loc("add_603"(#loc561))
+#loc2652 = loc("multiply_604"(#loc562))
+#loc2653 = loc("reduce_avg_605"(#loc563))
+#loc2654 = loc("add_606"(#loc564))
+#loc2655 = loc("sqrt_607"(#loc565))
+#loc2656 = loc("reciprocal_608"(#loc566))
+#loc2657 = loc("multiply_609"(#loc567))
+#loc2658 = loc("multiply_610"(#loc568))
+#loc2659 = loc("reshape_611.dc.squeeze.0"(#loc569))
+#loc2660 = loc("matmul_613"(#loc570))
+#loc2661 = loc("reshape_614"(#loc571))
+#loc2662 = loc("transpose_615"(#loc572))
+#loc2663 = loc("concatenate_622"(#loc573))
+#loc2664 = loc("cosine_623"(#loc574))
+#loc2665 = loc("unsqueeze_624"(#loc575))
+#loc2666 = loc("multiply_625"(#loc576))
+#loc2667 = loc("index_626.dc.transpose.0"(#loc577))
+#loc2668 = loc("index_626.dc.matmul.2"(#loc578))
+#loc2669 = loc("index_626.dc.transpose.3"(#loc579))
+#loc2670 = loc("multiply_627"(#loc580))
+#loc2671 = loc("index_628.dc.transpose.0"(#loc581))
+#loc2672 = loc("index_628.dc.matmul.2"(#loc582))
+#loc2673 = loc("index_628.dc.transpose.3"(#loc583))
+#loc2674 = loc("concatenate_629"(#loc584))
+#loc2675 = loc("sine_630"(#loc585))
+#loc2676 = loc("unsqueeze_631"(#loc586))
+#loc2677 = loc("multiply_632"(#loc587))
+#loc2678 = loc("add_633"(#loc588))
+#loc2679 = loc("reshape_634.dc.squeeze.0"(#loc589))
+#loc2680 = loc("matmul_636"(#loc590))
+#loc2681 = loc("reshape_637"(#loc591))
+#loc2682 = loc("transpose_638"(#loc592))
+#loc2683 = loc("multiply_639"(#loc593))
+#loc2684 = loc("index_640.dc.transpose.0"(#loc594))
+#loc2685 = loc("index_640.dc.matmul.2"(#loc595))
+#loc2686 = loc("index_640.dc.transpose.3"(#loc596))
+#loc2687 = loc("multiply_641"(#loc597))
+#loc2688 = loc("index_642.dc.transpose.0"(#loc598))
+#loc2689 = loc("index_642.dc.matmul.2"(#loc599))
+#loc2690 = loc("index_642.dc.transpose.3"(#loc600))
+#loc2691 = loc("concatenate_643"(#loc601))
+#loc2692 = loc("multiply_644"(#loc602))
+#loc2693 = loc("add_645"(#loc603))
+#loc2694 = loc("reshape_646.dc.squeeze.0"(#loc604))
+#loc2695 = loc("transpose_647"(#loc605))
+#loc2696 = loc("matmul_648"(#loc606))
+#loc2697 = loc("reshape_649.dc.unsqueeze.0"(#loc607))
+#loc2698 = loc("multiply_650"(#loc608))
+#loc2699 = loc("add_651"(#loc609))
+#loc2700 = loc("softmax_652"(#loc610))
+#loc2701 = loc("reshape_654.dc.squeeze.0"(#loc611))
+#loc2702 = loc("matmul_656"(#loc612))
+#loc2703 = loc("reshape_657"(#loc613))
+#loc2704 = loc("transpose_658"(#loc614))
+#loc2705 = loc("transpose_659"(#loc615))
+#loc2706 = loc("reshape_660.dc.squeeze.0"(#loc616))
+#loc2707 = loc("transpose_661"(#loc617))
+#loc2708 = loc("matmul_662"(#loc618))
+#loc2709 = loc("reshape_663.dc.unsqueeze.0"(#loc619))
+#loc2710 = loc("transpose_664"(#loc620))
+#loc2711 = loc("reshape_665"(#loc621))
+#loc2712 = loc("matmul_667"(#loc622))
+#loc2713 = loc("reshape_668.dc.unsqueeze.0"(#loc623))
+#loc2714 = loc("add_669"(#loc624))
+#loc2715 = loc("multiply_670"(#loc625))
+#loc2716 = loc("reduce_avg_671"(#loc626))
+#loc2717 = loc("add_672"(#loc627))
+#loc2718 = loc("sqrt_673"(#loc628))
+#loc2719 = loc("reciprocal_674"(#loc629))
+#loc2720 = loc("multiply_675"(#loc630))
+#loc2721 = loc("multiply_676"(#loc631))
+#loc2722 = loc("reshape_677.dc.squeeze.0"(#loc632))
+#loc2723 = loc("matmul_679"(#loc633))
+#loc2724 = loc("reshape_680.dc.unsqueeze.0"(#loc634))
+#loc2725 = loc("sigmoid_681"(#loc635))
+#loc2726 = loc("multiply_682"(#loc636))
+#loc2727 = loc("matmul_684"(#loc637))
+#loc2728 = loc("reshape_685.dc.unsqueeze.0"(#loc638))
+#loc2729 = loc("multiply_686"(#loc639))
+#loc2730 = loc("matmul_688"(#loc640))
+#loc2731 = loc("add_689"(#loc641))
+#loc2732 = loc("multiply_690"(#loc642))
+#loc2733 = loc("reduce_avg_691"(#loc643))
+#loc2734 = loc("add_692"(#loc644))
+#loc2735 = loc("sqrt_693"(#loc645))
+#loc2736 = loc("reciprocal_694"(#loc646))
+#loc2737 = loc("multiply_695"(#loc647))
+#loc2738 = loc("multiply_696"(#loc648))
+#loc2739 = loc("reshape_697.dc.squeeze.0"(#loc649))
+#loc2740 = loc("matmul_699"(#loc650))
+#loc2741 = loc("reshape_700"(#loc651))
+#loc2742 = loc("transpose_701"(#loc652))
+#loc2743 = loc("concatenate_708"(#loc653))
+#loc2744 = loc("cosine_709"(#loc654))
+#loc2745 = loc("unsqueeze_710"(#loc655))
+#loc2746 = loc("multiply_711"(#loc656))
+#loc2747 = loc("index_712.dc.transpose.0"(#loc657))
+#loc2748 = loc("index_712.dc.matmul.2"(#loc658))
+#loc2749 = loc("index_712.dc.transpose.3"(#loc659))
+#loc2750 = loc("multiply_713"(#loc660))
+#loc2751 = loc("index_714.dc.transpose.0"(#loc661))
+#loc2752 = loc("index_714.dc.matmul.2"(#loc662))
+#loc2753 = loc("index_714.dc.transpose.3"(#loc663))
+#loc2754 = loc("concatenate_715"(#loc664))
+#loc2755 = loc("sine_716"(#loc665))
+#loc2756 = loc("unsqueeze_717"(#loc666))
+#loc2757 = loc("multiply_718"(#loc667))
+#loc2758 = loc("add_719"(#loc668))
+#loc2759 = loc("reshape_720.dc.squeeze.0"(#loc669))
+#loc2760 = loc("matmul_722"(#loc670))
+#loc2761 = loc("reshape_723"(#loc671))
+#loc2762 = loc("transpose_724"(#loc672))
+#loc2763 = loc("multiply_725"(#loc673))
+#loc2764 = loc("index_726.dc.transpose.0"(#loc674))
+#loc2765 = loc("index_726.dc.matmul.2"(#loc675))
+#loc2766 = loc("index_726.dc.transpose.3"(#loc676))
+#loc2767 = loc("multiply_727"(#loc677))
+#loc2768 = loc("index_728.dc.transpose.0"(#loc678))
+#loc2769 = loc("index_728.dc.matmul.2"(#loc679))
+#loc2770 = loc("index_728.dc.transpose.3"(#loc680))
+#loc2771 = loc("concatenate_729"(#loc681))
+#loc2772 = loc("multiply_730"(#loc682))
+#loc2773 = loc("add_731"(#loc683))
+#loc2774 = loc("reshape_732.dc.squeeze.0"(#loc684))
+#loc2775 = loc("transpose_733"(#loc685))
+#loc2776 = loc("matmul_734"(#loc686))
+#loc2777 = loc("reshape_735.dc.unsqueeze.0"(#loc687))
+#loc2778 = loc("multiply_736"(#loc688))
+#loc2779 = loc("add_737"(#loc689))
+#loc2780 = loc("softmax_738"(#loc690))
+#loc2781 = loc("reshape_740.dc.squeeze.0"(#loc691))
+#loc2782 = loc("matmul_742"(#loc692))
+#loc2783 = loc("reshape_743"(#loc693))
+#loc2784 = loc("transpose_744"(#loc694))
+#loc2785 = loc("transpose_745"(#loc695))
+#loc2786 = loc("reshape_746.dc.squeeze.0"(#loc696))
+#loc2787 = loc("transpose_747"(#loc697))
+#loc2788 = loc("matmul_748"(#loc698))
+#loc2789 = loc("reshape_749.dc.unsqueeze.0"(#loc699))
+#loc2790 = loc("transpose_750"(#loc700))
+#loc2791 = loc("reshape_751"(#loc701))
+#loc2792 = loc("matmul_753"(#loc702))
+#loc2793 = loc("reshape_754.dc.unsqueeze.0"(#loc703))
+#loc2794 = loc("add_755"(#loc704))
+#loc2795 = loc("multiply_756"(#loc705))
+#loc2796 = loc("reduce_avg_757"(#loc706))
+#loc2797 = loc("add_758"(#loc707))
+#loc2798 = loc("sqrt_759"(#loc708))
+#loc2799 = loc("reciprocal_760"(#loc709))
+#loc2800 = loc("multiply_761"(#loc710))
+#loc2801 = loc("multiply_762"(#loc711))
+#loc2802 = loc("reshape_763.dc.squeeze.0"(#loc712))
+#loc2803 = loc("matmul_765"(#loc713))
+#loc2804 = loc("reshape_766.dc.unsqueeze.0"(#loc714))
+#loc2805 = loc("sigmoid_767"(#loc715))
+#loc2806 = loc("multiply_768"(#loc716))
+#loc2807 = loc("matmul_770"(#loc717))
+#loc2808 = loc("reshape_771.dc.unsqueeze.0"(#loc718))
+#loc2809 = loc("multiply_772"(#loc719))
+#loc2810 = loc("matmul_774"(#loc720))
+#loc2811 = loc("add_775"(#loc721))
+#loc2812 = loc("multiply_776"(#loc722))
+#loc2813 = loc("reduce_avg_777"(#loc723))
+#loc2814 = loc("add_778"(#loc724))
+#loc2815 = loc("sqrt_779"(#loc725))
+#loc2816 = loc("reciprocal_780"(#loc726))
+#loc2817 = loc("multiply_781"(#loc727))
+#loc2818 = loc("multiply_782"(#loc728))
+#loc2819 = loc("reshape_783.dc.squeeze.0"(#loc729))
+#loc2820 = loc("matmul_785"(#loc730))
+#loc2821 = loc("reshape_786"(#loc731))
+#loc2822 = loc("transpose_787"(#loc732))
+#loc2823 = loc("concatenate_794"(#loc733))
+#loc2824 = loc("cosine_795"(#loc734))
+#loc2825 = loc("unsqueeze_796"(#loc735))
+#loc2826 = loc("multiply_797"(#loc736))
+#loc2827 = loc("index_798.dc.transpose.0"(#loc737))
+#loc2828 = loc("index_798.dc.matmul.2"(#loc738))
+#loc2829 = loc("index_798.dc.transpose.3"(#loc739))
+#loc2830 = loc("multiply_799"(#loc740))
+#loc2831 = loc("index_800.dc.transpose.0"(#loc741))
+#loc2832 = loc("index_800.dc.matmul.2"(#loc742))
+#loc2833 = loc("index_800.dc.transpose.3"(#loc743))
+#loc2834 = loc("concatenate_801"(#loc744))
+#loc2835 = loc("sine_802"(#loc745))
+#loc2836 = loc("unsqueeze_803"(#loc746))
+#loc2837 = loc("multiply_804"(#loc747))
+#loc2838 = loc("add_805"(#loc748))
+#loc2839 = loc("reshape_806.dc.squeeze.0"(#loc749))
+#loc2840 = loc("matmul_808"(#loc750))
+#loc2841 = loc("reshape_809"(#loc751))
+#loc2842 = loc("transpose_810"(#loc752))
+#loc2843 = loc("multiply_811"(#loc753))
+#loc2844 = loc("index_812.dc.transpose.0"(#loc754))
+#loc2845 = loc("index_812.dc.matmul.2"(#loc755))
+#loc2846 = loc("index_812.dc.transpose.3"(#loc756))
+#loc2847 = loc("multiply_813"(#loc757))
+#loc2848 = loc("index_814.dc.transpose.0"(#loc758))
+#loc2849 = loc("index_814.dc.matmul.2"(#loc759))
+#loc2850 = loc("index_814.dc.transpose.3"(#loc760))
+#loc2851 = loc("concatenate_815"(#loc761))
+#loc2852 = loc("multiply_816"(#loc762))
+#loc2853 = loc("add_817"(#loc763))
+#loc2854 = loc("reshape_818.dc.squeeze.0"(#loc764))
+#loc2855 = loc("transpose_819"(#loc765))
+#loc2856 = loc("matmul_820"(#loc766))
+#loc2857 = loc("reshape_821.dc.unsqueeze.0"(#loc767))
+#loc2858 = loc("multiply_822"(#loc768))
+#loc2859 = loc("add_823"(#loc769))
+#loc2860 = loc("softmax_824"(#loc770))
+#loc2861 = loc("reshape_826.dc.squeeze.0"(#loc771))
+#loc2862 = loc("matmul_828"(#loc772))
+#loc2863 = loc("reshape_829"(#loc773))
+#loc2864 = loc("transpose_830"(#loc774))
+#loc2865 = loc("transpose_831"(#loc775))
+#loc2866 = loc("reshape_832.dc.squeeze.0"(#loc776))
+#loc2867 = loc("transpose_833"(#loc777))
+#loc2868 = loc("matmul_834"(#loc778))
+#loc2869 = loc("reshape_835.dc.unsqueeze.0"(#loc779))
+#loc2870 = loc("transpose_836"(#loc780))
+#loc2871 = loc("reshape_837"(#loc781))
+#loc2872 = loc("matmul_839"(#loc782))
+#loc2873 = loc("reshape_840.dc.unsqueeze.0"(#loc783))
+#loc2874 = loc("add_841"(#loc784))
+#loc2875 = loc("multiply_842"(#loc785))
+#loc2876 = loc("reduce_avg_843"(#loc786))
+#loc2877 = loc("add_844"(#loc787))
+#loc2878 = loc("sqrt_845"(#loc788))
+#loc2879 = loc("reciprocal_846"(#loc789))
+#loc2880 = loc("multiply_847"(#loc790))
+#loc2881 = loc("multiply_848"(#loc791))
+#loc2882 = loc("reshape_849.dc.squeeze.0"(#loc792))
+#loc2883 = loc("matmul_851"(#loc793))
+#loc2884 = loc("reshape_852.dc.unsqueeze.0"(#loc794))
+#loc2885 = loc("sigmoid_853"(#loc795))
+#loc2886 = loc("multiply_854"(#loc796))
+#loc2887 = loc("matmul_856"(#loc797))
+#loc2888 = loc("reshape_857.dc.unsqueeze.0"(#loc798))
+#loc2889 = loc("multiply_858"(#loc799))
+#loc2890 = loc("matmul_860"(#loc800))
+#loc2891 = loc("add_861"(#loc801))
+#loc2892 = loc("multiply_862"(#loc802))
+#loc2893 = loc("reduce_avg_863"(#loc803))
+#loc2894 = loc("add_864"(#loc804))
+#loc2895 = loc("sqrt_865"(#loc805))
+#loc2896 = loc("reciprocal_866"(#loc806))
+#loc2897 = loc("multiply_867"(#loc807))
+#loc2898 = loc("multiply_868"(#loc808))
+#loc2899 = loc("reshape_869.dc.squeeze.0"(#loc809))
+#loc2900 = loc("matmul_871"(#loc810))
+#loc2901 = loc("reshape_872"(#loc811))
+#loc2902 = loc("transpose_873"(#loc812))
+#loc2903 = loc("concatenate_880"(#loc813))
+#loc2904 = loc("cosine_881"(#loc814))
+#loc2905 = loc("unsqueeze_882"(#loc815))
+#loc2906 = loc("multiply_883"(#loc816))
+#loc2907 = loc("index_884.dc.transpose.0"(#loc817))
+#loc2908 = loc("index_884.dc.matmul.2"(#loc818))
+#loc2909 = loc("index_884.dc.transpose.3"(#loc819))
+#loc2910 = loc("multiply_885"(#loc820))
+#loc2911 = loc("index_886.dc.transpose.0"(#loc821))
+#loc2912 = loc("index_886.dc.matmul.2"(#loc822))
+#loc2913 = loc("index_886.dc.transpose.3"(#loc823))
+#loc2914 = loc("concatenate_887"(#loc824))
+#loc2915 = loc("sine_888"(#loc825))
+#loc2916 = loc("unsqueeze_889"(#loc826))
+#loc2917 = loc("multiply_890"(#loc827))
+#loc2918 = loc("add_891"(#loc828))
+#loc2919 = loc("reshape_892.dc.squeeze.0"(#loc829))
+#loc2920 = loc("matmul_894"(#loc830))
+#loc2921 = loc("reshape_895"(#loc831))
+#loc2922 = loc("transpose_896"(#loc832))
+#loc2923 = loc("multiply_897"(#loc833))
+#loc2924 = loc("index_898.dc.transpose.0"(#loc834))
+#loc2925 = loc("index_898.dc.matmul.2"(#loc835))
+#loc2926 = loc("index_898.dc.transpose.3"(#loc836))
+#loc2927 = loc("multiply_899"(#loc837))
+#loc2928 = loc("index_900.dc.transpose.0"(#loc838))
+#loc2929 = loc("index_900.dc.matmul.2"(#loc839))
+#loc2930 = loc("index_900.dc.transpose.3"(#loc840))
+#loc2931 = loc("concatenate_901"(#loc841))
+#loc2932 = loc("multiply_902"(#loc842))
+#loc2933 = loc("add_903"(#loc843))
+#loc2934 = loc("reshape_904.dc.squeeze.0"(#loc844))
+#loc2935 = loc("transpose_905"(#loc845))
+#loc2936 = loc("matmul_906"(#loc846))
+#loc2937 = loc("reshape_907.dc.unsqueeze.0"(#loc847))
+#loc2938 = loc("multiply_908"(#loc848))
+#loc2939 = loc("add_909"(#loc849))
+#loc2940 = loc("softmax_910"(#loc850))
+#loc2941 = loc("reshape_912.dc.squeeze.0"(#loc851))
+#loc2942 = loc("matmul_914"(#loc852))
+#loc2943 = loc("reshape_915"(#loc853))
+#loc2944 = loc("transpose_916"(#loc854))
+#loc2945 = loc("transpose_917"(#loc855))
+#loc2946 = loc("reshape_918.dc.squeeze.0"(#loc856))
+#loc2947 = loc("transpose_919"(#loc857))
+#loc2948 = loc("matmul_920"(#loc858))
+#loc2949 = loc("reshape_921.dc.unsqueeze.0"(#loc859))
+#loc2950 = loc("transpose_922"(#loc860))
+#loc2951 = loc("reshape_923"(#loc861))
+#loc2952 = loc("matmul_925"(#loc862))
+#loc2953 = loc("reshape_926.dc.unsqueeze.0"(#loc863))
+#loc2954 = loc("add_927"(#loc864))
+#loc2955 = loc("multiply_928"(#loc865))
+#loc2956 = loc("reduce_avg_929"(#loc866))
+#loc2957 = loc("add_930"(#loc867))
+#loc2958 = loc("sqrt_931"(#loc868))
+#loc2959 = loc("reciprocal_932"(#loc869))
+#loc2960 = loc("multiply_933"(#loc870))
+#loc2961 = loc("multiply_934"(#loc871))
+#loc2962 = loc("reshape_935.dc.squeeze.0"(#loc872))
+#loc2963 = loc("matmul_937"(#loc873))
+#loc2964 = loc("reshape_938.dc.unsqueeze.0"(#loc874))
+#loc2965 = loc("sigmoid_939"(#loc875))
+#loc2966 = loc("multiply_940"(#loc876))
+#loc2967 = loc("matmul_942"(#loc877))
+#loc2968 = loc("reshape_943.dc.unsqueeze.0"(#loc878))
+#loc2969 = loc("multiply_944"(#loc879))
+#loc2970 = loc("matmul_946"(#loc880))
+#loc2971 = loc("add_947"(#loc881))
+#loc2972 = loc("multiply_948"(#loc882))
+#loc2973 = loc("reduce_avg_949"(#loc883))
+#loc2974 = loc("add_950"(#loc884))
+#loc2975 = loc("sqrt_951"(#loc885))
+#loc2976 = loc("reciprocal_952"(#loc886))
+#loc2977 = loc("multiply_953"(#loc887))
+#loc2978 = loc("multiply_954"(#loc888))
+#loc2979 = loc("reshape_955.dc.squeeze.0"(#loc889))
+#loc2980 = loc("matmul_957"(#loc890))
+#loc2981 = loc("reshape_958"(#loc891))
+#loc2982 = loc("transpose_959"(#loc892))
+#loc2983 = loc("concatenate_966"(#loc893))
+#loc2984 = loc("cosine_967"(#loc894))
+#loc2985 = loc("unsqueeze_968"(#loc895))
+#loc2986 = loc("multiply_969"(#loc896))
+#loc2987 = loc("index_970.dc.transpose.0"(#loc897))
+#loc2988 = loc("index_970.dc.matmul.2"(#loc898))
+#loc2989 = loc("index_970.dc.transpose.3"(#loc899))
+#loc2990 = loc("multiply_971"(#loc900))
+#loc2991 = loc("index_972.dc.transpose.0"(#loc901))
+#loc2992 = loc("index_972.dc.matmul.2"(#loc902))
+#loc2993 = loc("index_972.dc.transpose.3"(#loc903))
+#loc2994 = loc("concatenate_973"(#loc904))
+#loc2995 = loc("sine_974"(#loc905))
+#loc2996 = loc("unsqueeze_975"(#loc906))
+#loc2997 = loc("multiply_976"(#loc907))
+#loc2998 = loc("add_977"(#loc908))
+#loc2999 = loc("reshape_978.dc.squeeze.0"(#loc909))
+#loc3000 = loc("matmul_980"(#loc910))
+#loc3001 = loc("reshape_981"(#loc911))
+#loc3002 = loc("transpose_982"(#loc912))
+#loc3003 = loc("multiply_983"(#loc913))
+#loc3004 = loc("index_984.dc.transpose.0"(#loc914))
+#loc3005 = loc("index_984.dc.matmul.2"(#loc915))
+#loc3006 = loc("index_984.dc.transpose.3"(#loc916))
+#loc3007 = loc("multiply_985"(#loc917))
+#loc3008 = loc("index_986.dc.transpose.0"(#loc918))
+#loc3009 = loc("index_986.dc.matmul.2"(#loc919))
+#loc3010 = loc("index_986.dc.transpose.3"(#loc920))
+#loc3011 = loc("concatenate_987"(#loc921))
+#loc3012 = loc("multiply_988"(#loc922))
+#loc3013 = loc("add_989"(#loc923))
+#loc3014 = loc("reshape_990.dc.squeeze.0"(#loc924))
+#loc3015 = loc("transpose_991"(#loc925))
+#loc3016 = loc("matmul_992"(#loc926))
+#loc3017 = loc("reshape_993.dc.unsqueeze.0"(#loc927))
+#loc3018 = loc("multiply_994"(#loc928))
+#loc3019 = loc("add_995"(#loc929))
+#loc3020 = loc("softmax_996"(#loc930))
+#loc3021 = loc("reshape_998.dc.squeeze.0"(#loc931))
+#loc3022 = loc("matmul_1000"(#loc932))
+#loc3023 = loc("reshape_1001"(#loc933))
+#loc3024 = loc("transpose_1002"(#loc934))
+#loc3025 = loc("transpose_1003"(#loc935))
+#loc3026 = loc("reshape_1004.dc.squeeze.0"(#loc936))
+#loc3027 = loc("transpose_1005"(#loc937))
+#loc3028 = loc("matmul_1006"(#loc938))
+#loc3029 = loc("reshape_1007.dc.unsqueeze.0"(#loc939))
+#loc3030 = loc("transpose_1008"(#loc940))
+#loc3031 = loc("reshape_1009"(#loc941))
+#loc3032 = loc("matmul_1011"(#loc942))
+#loc3033 = loc("reshape_1012.dc.unsqueeze.0"(#loc943))
+#loc3034 = loc("add_1013"(#loc944))
+#loc3035 = loc("multiply_1014"(#loc945))
+#loc3036 = loc("reduce_avg_1015"(#loc946))
+#loc3037 = loc("add_1016"(#loc947))
+#loc3038 = loc("sqrt_1017"(#loc948))
+#loc3039 = loc("reciprocal_1018"(#loc949))
+#loc3040 = loc("multiply_1019"(#loc950))
+#loc3041 = loc("multiply_1020"(#loc951))
+#loc3042 = loc("reshape_1021.dc.squeeze.0"(#loc952))
+#loc3043 = loc("matmul_1023"(#loc953))
+#loc3044 = loc("reshape_1024.dc.unsqueeze.0"(#loc954))
+#loc3045 = loc("sigmoid_1025"(#loc955))
+#loc3046 = loc("multiply_1026"(#loc956))
+#loc3047 = loc("matmul_1028"(#loc957))
+#loc3048 = loc("reshape_1029.dc.unsqueeze.0"(#loc958))
+#loc3049 = loc("multiply_1030"(#loc959))
+#loc3050 = loc("matmul_1032"(#loc960))
+#loc3051 = loc("add_1033"(#loc961))
+#loc3052 = loc("multiply_1034"(#loc962))
+#loc3053 = loc("reduce_avg_1035"(#loc963))
+#loc3054 = loc("add_1036"(#loc964))
+#loc3055 = loc("sqrt_1037"(#loc965))
+#loc3056 = loc("reciprocal_1038"(#loc966))
+#loc3057 = loc("multiply_1039"(#loc967))
+#loc3058 = loc("multiply_1040"(#loc968))
+#loc3059 = loc("reshape_1041.dc.squeeze.0"(#loc969))
+#loc3060 = loc("matmul_1043"(#loc970))
+#loc3061 = loc("reshape_1044"(#loc971))
+#loc3062 = loc("transpose_1045"(#loc972))
+#loc3063 = loc("concatenate_1052"(#loc973))
+#loc3064 = loc("cosine_1053"(#loc974))
+#loc3065 = loc("unsqueeze_1054"(#loc975))
+#loc3066 = loc("multiply_1055"(#loc976))
+#loc3067 = loc("index_1056.dc.transpose.0"(#loc977))
+#loc3068 = loc("index_1056.dc.matmul.2"(#loc978))
+#loc3069 = loc("index_1056.dc.transpose.3"(#loc979))
+#loc3070 = loc("multiply_1057"(#loc980))
+#loc3071 = loc("index_1058.dc.transpose.0"(#loc981))
+#loc3072 = loc("index_1058.dc.matmul.2"(#loc982))
+#loc3073 = loc("index_1058.dc.transpose.3"(#loc983))
+#loc3074 = loc("concatenate_1059"(#loc984))
+#loc3075 = loc("sine_1060"(#loc985))
+#loc3076 = loc("unsqueeze_1061"(#loc986))
+#loc3077 = loc("multiply_1062"(#loc987))
+#loc3078 = loc("add_1063"(#loc988))
+#loc3079 = loc("reshape_1064.dc.squeeze.0"(#loc989))
+#loc3080 = loc("matmul_1066"(#loc990))
+#loc3081 = loc("reshape_1067"(#loc991))
+#loc3082 = loc("transpose_1068"(#loc992))
+#loc3083 = loc("multiply_1069"(#loc993))
+#loc3084 = loc("index_1070.dc.transpose.0"(#loc994))
+#loc3085 = loc("index_1070.dc.matmul.2"(#loc995))
+#loc3086 = loc("index_1070.dc.transpose.3"(#loc996))
+#loc3087 = loc("multiply_1071"(#loc997))
+#loc3088 = loc("index_1072.dc.transpose.0"(#loc998))
+#loc3089 = loc("index_1072.dc.matmul.2"(#loc999))
+#loc3090 = loc("index_1072.dc.transpose.3"(#loc1000))
+#loc3091 = loc("concatenate_1073"(#loc1001))
+#loc3092 = loc("multiply_1074"(#loc1002))
+#loc3093 = loc("add_1075"(#loc1003))
+#loc3094 = loc("reshape_1076.dc.squeeze.0"(#loc1004))
+#loc3095 = loc("transpose_1077"(#loc1005))
+#loc3096 = loc("matmul_1078"(#loc1006))
+#loc3097 = loc("reshape_1079.dc.unsqueeze.0"(#loc1007))
+#loc3098 = loc("multiply_1080"(#loc1008))
+#loc3099 = loc("add_1081"(#loc1009))
+#loc3100 = loc("softmax_1082"(#loc1010))
+#loc3101 = loc("reshape_1084.dc.squeeze.0"(#loc1011))
+#loc3102 = loc("matmul_1086"(#loc1012))
+#loc3103 = loc("reshape_1087"(#loc1013))
+#loc3104 = loc("transpose_1088"(#loc1014))
+#loc3105 = loc("transpose_1089"(#loc1015))
+#loc3106 = loc("reshape_1090.dc.squeeze.0"(#loc1016))
+#loc3107 = loc("transpose_1091"(#loc1017))
+#loc3108 = loc("matmul_1092"(#loc1018))
+#loc3109 = loc("reshape_1093.dc.unsqueeze.0"(#loc1019))
+#loc3110 = loc("transpose_1094"(#loc1020))
+#loc3111 = loc("reshape_1095"(#loc1021))
+#loc3112 = loc("matmul_1097"(#loc1022))
+#loc3113 = loc("reshape_1098.dc.unsqueeze.0"(#loc1023))
+#loc3114 = loc("add_1099"(#loc1024))
+#loc3115 = loc("multiply_1100"(#loc1025))
+#loc3116 = loc("reduce_avg_1101"(#loc1026))
+#loc3117 = loc("add_1102"(#loc1027))
+#loc3118 = loc("sqrt_1103"(#loc1028))
+#loc3119 = loc("reciprocal_1104"(#loc1029))
+#loc3120 = loc("multiply_1105"(#loc1030))
+#loc3121 = loc("multiply_1106"(#loc1031))
+#loc3122 = loc("reshape_1107.dc.squeeze.0"(#loc1032))
+#loc3123 = loc("matmul_1109"(#loc1033))
+#loc3124 = loc("reshape_1110.dc.unsqueeze.0"(#loc1034))
+#loc3125 = loc("sigmoid_1111"(#loc1035))
+#loc3126 = loc("multiply_1112"(#loc1036))
+#loc3127 = loc("matmul_1114"(#loc1037))
+#loc3128 = loc("reshape_1115.dc.unsqueeze.0"(#loc1038))
+#loc3129 = loc("multiply_1116"(#loc1039))
+#loc3130 = loc("matmul_1118"(#loc1040))
+#loc3131 = loc("add_1119"(#loc1041))
+#loc3132 = loc("multiply_1120"(#loc1042))
+#loc3133 = loc("reduce_avg_1121"(#loc1043))
+#loc3134 = loc("add_1122"(#loc1044))
+#loc3135 = loc("sqrt_1123"(#loc1045))
+#loc3136 = loc("reciprocal_1124"(#loc1046))
+#loc3137 = loc("multiply_1125"(#loc1047))
+#loc3138 = loc("multiply_1126"(#loc1048))
+#loc3139 = loc("reshape_1127.dc.squeeze.0"(#loc1049))
+#loc3140 = loc("matmul_1129"(#loc1050))
+#loc3141 = loc("reshape_1130"(#loc1051))
+#loc3142 = loc("transpose_1131"(#loc1052))
+#loc3143 = loc("concatenate_1138"(#loc1053))
+#loc3144 = loc("cosine_1139"(#loc1054))
+#loc3145 = loc("unsqueeze_1140"(#loc1055))
+#loc3146 = loc("multiply_1141"(#loc1056))
+#loc3147 = loc("index_1142.dc.transpose.0"(#loc1057))
+#loc3148 = loc("index_1142.dc.matmul.2"(#loc1058))
+#loc3149 = loc("index_1142.dc.transpose.3"(#loc1059))
+#loc3150 = loc("multiply_1143"(#loc1060))
+#loc3151 = loc("index_1144.dc.transpose.0"(#loc1061))
+#loc3152 = loc("index_1144.dc.matmul.2"(#loc1062))
+#loc3153 = loc("index_1144.dc.transpose.3"(#loc1063))
+#loc3154 = loc("concatenate_1145"(#loc1064))
+#loc3155 = loc("sine_1146"(#loc1065))
+#loc3156 = loc("unsqueeze_1147"(#loc1066))
+#loc3157 = loc("multiply_1148"(#loc1067))
+#loc3158 = loc("add_1149"(#loc1068))
+#loc3159 = loc("reshape_1150.dc.squeeze.0"(#loc1069))
+#loc3160 = loc("matmul_1152"(#loc1070))
+#loc3161 = loc("reshape_1153"(#loc1071))
+#loc3162 = loc("transpose_1154"(#loc1072))
+#loc3163 = loc("multiply_1155"(#loc1073))
+#loc3164 = loc("index_1156.dc.transpose.0"(#loc1074))
+#loc3165 = loc("index_1156.dc.matmul.2"(#loc1075))
+#loc3166 = loc("index_1156.dc.transpose.3"(#loc1076))
+#loc3167 = loc("multiply_1157"(#loc1077))
+#loc3168 = loc("index_1158.dc.transpose.0"(#loc1078))
+#loc3169 = loc("index_1158.dc.matmul.2"(#loc1079))
+#loc3170 = loc("index_1158.dc.transpose.3"(#loc1080))
+#loc3171 = loc("concatenate_1159"(#loc1081))
+#loc3172 = loc("multiply_1160"(#loc1082))
+#loc3173 = loc("add_1161"(#loc1083))
+#loc3174 = loc("reshape_1162.dc.squeeze.0"(#loc1084))
+#loc3175 = loc("transpose_1163"(#loc1085))
+#loc3176 = loc("matmul_1164"(#loc1086))
+#loc3177 = loc("reshape_1165.dc.unsqueeze.0"(#loc1087))
+#loc3178 = loc("multiply_1166"(#loc1088))
+#loc3179 = loc("add_1167"(#loc1089))
+#loc3180 = loc("softmax_1168"(#loc1090))
+#loc3181 = loc("reshape_1170.dc.squeeze.0"(#loc1091))
+#loc3182 = loc("matmul_1172"(#loc1092))
+#loc3183 = loc("reshape_1173"(#loc1093))
+#loc3184 = loc("transpose_1174"(#loc1094))
+#loc3185 = loc("transpose_1175"(#loc1095))
+#loc3186 = loc("reshape_1176.dc.squeeze.0"(#loc1096))
+#loc3187 = loc("transpose_1177"(#loc1097))
+#loc3188 = loc("matmul_1178"(#loc1098))
+#loc3189 = loc("reshape_1179.dc.unsqueeze.0"(#loc1099))
+#loc3190 = loc("transpose_1180"(#loc1100))
+#loc3191 = loc("reshape_1181"(#loc1101))
+#loc3192 = loc("matmul_1183"(#loc1102))
+#loc3193 = loc("reshape_1184.dc.unsqueeze.0"(#loc1103))
+#loc3194 = loc("add_1185"(#loc1104))
+#loc3195 = loc("multiply_1186"(#loc1105))
+#loc3196 = loc("reduce_avg_1187"(#loc1106))
+#loc3197 = loc("add_1188"(#loc1107))
+#loc3198 = loc("sqrt_1189"(#loc1108))
+#loc3199 = loc("reciprocal_1190"(#loc1109))
+#loc3200 = loc("multiply_1191"(#loc1110))
+#loc3201 = loc("multiply_1192"(#loc1111))
+#loc3202 = loc("reshape_1193.dc.squeeze.0"(#loc1112))
+#loc3203 = loc("matmul_1195"(#loc1113))
+#loc3204 = loc("reshape_1196.dc.unsqueeze.0"(#loc1114))
+#loc3205 = loc("sigmoid_1197"(#loc1115))
+#loc3206 = loc("multiply_1198"(#loc1116))
+#loc3207 = loc("matmul_1200"(#loc1117))
+#loc3208 = loc("reshape_1201.dc.unsqueeze.0"(#loc1118))
+#loc3209 = loc("multiply_1202"(#loc1119))
+#loc3210 = loc("matmul_1204"(#loc1120))
+#loc3211 = loc("add_1205"(#loc1121))
+#loc3212 = loc("multiply_1206"(#loc1122))
+#loc3213 = loc("reduce_avg_1207"(#loc1123))
+#loc3214 = loc("add_1208"(#loc1124))
+#loc3215 = loc("sqrt_1209"(#loc1125))
+#loc3216 = loc("reciprocal_1210"(#loc1126))
+#loc3217 = loc("multiply_1211"(#loc1127))
+#loc3218 = loc("multiply_1212"(#loc1128))
+#loc3219 = loc("reshape_1213.dc.squeeze.0"(#loc1129))
+#loc3220 = loc("matmul_1215"(#loc1130))
+#loc3221 = loc("reshape_1216"(#loc1131))
+#loc3222 = loc("transpose_1217"(#loc1132))
+#loc3223 = loc("concatenate_1224"(#loc1133))
+#loc3224 = loc("cosine_1225"(#loc1134))
+#loc3225 = loc("unsqueeze_1226"(#loc1135))
+#loc3226 = loc("multiply_1227"(#loc1136))
+#loc3227 = loc("index_1228.dc.transpose.0"(#loc1137))
+#loc3228 = loc("index_1228.dc.matmul.2"(#loc1138))
+#loc3229 = loc("index_1228.dc.transpose.3"(#loc1139))
+#loc3230 = loc("multiply_1229"(#loc1140))
+#loc3231 = loc("index_1230.dc.transpose.0"(#loc1141))
+#loc3232 = loc("index_1230.dc.matmul.2"(#loc1142))
+#loc3233 = loc("index_1230.dc.transpose.3"(#loc1143))
+#loc3234 = loc("concatenate_1231"(#loc1144))
+#loc3235 = loc("sine_1232"(#loc1145))
+#loc3236 = loc("unsqueeze_1233"(#loc1146))
+#loc3237 = loc("multiply_1234"(#loc1147))
+#loc3238 = loc("add_1235"(#loc1148))
+#loc3239 = loc("reshape_1236.dc.squeeze.0"(#loc1149))
+#loc3240 = loc("matmul_1238"(#loc1150))
+#loc3241 = loc("reshape_1239"(#loc1151))
+#loc3242 = loc("transpose_1240"(#loc1152))
+#loc3243 = loc("multiply_1241"(#loc1153))
+#loc3244 = loc("index_1242.dc.transpose.0"(#loc1154))
+#loc3245 = loc("index_1242.dc.matmul.2"(#loc1155))
+#loc3246 = loc("index_1242.dc.transpose.3"(#loc1156))
+#loc3247 = loc("multiply_1243"(#loc1157))
+#loc3248 = loc("index_1244.dc.transpose.0"(#loc1158))
+#loc3249 = loc("index_1244.dc.matmul.2"(#loc1159))
+#loc3250 = loc("index_1244.dc.transpose.3"(#loc1160))
+#loc3251 = loc("concatenate_1245"(#loc1161))
+#loc3252 = loc("multiply_1246"(#loc1162))
+#loc3253 = loc("add_1247"(#loc1163))
+#loc3254 = loc("reshape_1248.dc.squeeze.0"(#loc1164))
+#loc3255 = loc("transpose_1249"(#loc1165))
+#loc3256 = loc("matmul_1250"(#loc1166))
+#loc3257 = loc("reshape_1251.dc.unsqueeze.0"(#loc1167))
+#loc3258 = loc("multiply_1252"(#loc1168))
+#loc3259 = loc("add_1253"(#loc1169))
+#loc3260 = loc("softmax_1254"(#loc1170))
+#loc3261 = loc("reshape_1256.dc.squeeze.0"(#loc1171))
+#loc3262 = loc("matmul_1258"(#loc1172))
+#loc3263 = loc("reshape_1259"(#loc1173))
+#loc3264 = loc("transpose_1260"(#loc1174))
+#loc3265 = loc("transpose_1261"(#loc1175))
+#loc3266 = loc("reshape_1262.dc.squeeze.0"(#loc1176))
+#loc3267 = loc("transpose_1263"(#loc1177))
+#loc3268 = loc("matmul_1264"(#loc1178))
+#loc3269 = loc("reshape_1265.dc.unsqueeze.0"(#loc1179))
+#loc3270 = loc("transpose_1266"(#loc1180))
+#loc3271 = loc("reshape_1267"(#loc1181))
+#loc3272 = loc("matmul_1269"(#loc1182))
+#loc3273 = loc("reshape_1270.dc.unsqueeze.0"(#loc1183))
+#loc3274 = loc("add_1271"(#loc1184))
+#loc3275 = loc("multiply_1272"(#loc1185))
+#loc3276 = loc("reduce_avg_1273"(#loc1186))
+#loc3277 = loc("add_1274"(#loc1187))
+#loc3278 = loc("sqrt_1275"(#loc1188))
+#loc3279 = loc("reciprocal_1276"(#loc1189))
+#loc3280 = loc("multiply_1277"(#loc1190))
+#loc3281 = loc("multiply_1278"(#loc1191))
+#loc3282 = loc("reshape_1279.dc.squeeze.0"(#loc1192))
+#loc3283 = loc("matmul_1281"(#loc1193))
+#loc3284 = loc("reshape_1282.dc.unsqueeze.0"(#loc1194))
+#loc3285 = loc("sigmoid_1283"(#loc1195))
+#loc3286 = loc("multiply_1284"(#loc1196))
+#loc3287 = loc("matmul_1286"(#loc1197))
+#loc3288 = loc("reshape_1287.dc.unsqueeze.0"(#loc1198))
+#loc3289 = loc("multiply_1288"(#loc1199))
+#loc3290 = loc("matmul_1290"(#loc1200))
+#loc3291 = loc("add_1291"(#loc1201))
+#loc3292 = loc("multiply_1292"(#loc1202))
+#loc3293 = loc("reduce_avg_1293"(#loc1203))
+#loc3294 = loc("add_1294"(#loc1204))
+#loc3295 = loc("sqrt_1295"(#loc1205))
+#loc3296 = loc("reciprocal_1296"(#loc1206))
+#loc3297 = loc("multiply_1297"(#loc1207))
+#loc3298 = loc("multiply_1298"(#loc1208))
+#loc3299 = loc("reshape_1299.dc.squeeze.0"(#loc1209))
+#loc3300 = loc("matmul_1301"(#loc1210))
+#loc3301 = loc("reshape_1302"(#loc1211))
+#loc3302 = loc("transpose_1303"(#loc1212))
+#loc3303 = loc("concatenate_1310"(#loc1213))
+#loc3304 = loc("cosine_1311"(#loc1214))
+#loc3305 = loc("unsqueeze_1312"(#loc1215))
+#loc3306 = loc("multiply_1313"(#loc1216))
+#loc3307 = loc("index_1314.dc.transpose.0"(#loc1217))
+#loc3308 = loc("index_1314.dc.matmul.2"(#loc1218))
+#loc3309 = loc("index_1314.dc.transpose.3"(#loc1219))
+#loc3310 = loc("multiply_1315"(#loc1220))
+#loc3311 = loc("index_1316.dc.transpose.0"(#loc1221))
+#loc3312 = loc("index_1316.dc.matmul.2"(#loc1222))
+#loc3313 = loc("index_1316.dc.transpose.3"(#loc1223))
+#loc3314 = loc("concatenate_1317"(#loc1224))
+#loc3315 = loc("sine_1318"(#loc1225))
+#loc3316 = loc("unsqueeze_1319"(#loc1226))
+#loc3317 = loc("multiply_1320"(#loc1227))
+#loc3318 = loc("add_1321"(#loc1228))
+#loc3319 = loc("reshape_1322.dc.squeeze.0"(#loc1229))
+#loc3320 = loc("matmul_1324"(#loc1230))
+#loc3321 = loc("reshape_1325"(#loc1231))
+#loc3322 = loc("transpose_1326"(#loc1232))
+#loc3323 = loc("multiply_1327"(#loc1233))
+#loc3324 = loc("index_1328.dc.transpose.0"(#loc1234))
+#loc3325 = loc("index_1328.dc.matmul.2"(#loc1235))
+#loc3326 = loc("index_1328.dc.transpose.3"(#loc1236))
+#loc3327 = loc("multiply_1329"(#loc1237))
+#loc3328 = loc("index_1330.dc.transpose.0"(#loc1238))
+#loc3329 = loc("index_1330.dc.matmul.2"(#loc1239))
+#loc3330 = loc("index_1330.dc.transpose.3"(#loc1240))
+#loc3331 = loc("concatenate_1331"(#loc1241))
+#loc3332 = loc("multiply_1332"(#loc1242))
+#loc3333 = loc("add_1333"(#loc1243))
+#loc3334 = loc("reshape_1334.dc.squeeze.0"(#loc1244))
+#loc3335 = loc("transpose_1335"(#loc1245))
+#loc3336 = loc("matmul_1336"(#loc1246))
+#loc3337 = loc("reshape_1337.dc.unsqueeze.0"(#loc1247))
+#loc3338 = loc("multiply_1338"(#loc1248))
+#loc3339 = loc("add_1339"(#loc1249))
+#loc3340 = loc("softmax_1340"(#loc1250))
+#loc3341 = loc("reshape_1342.dc.squeeze.0"(#loc1251))
+#loc3342 = loc("matmul_1344"(#loc1252))
+#loc3343 = loc("reshape_1345"(#loc1253))
+#loc3344 = loc("transpose_1346"(#loc1254))
+#loc3345 = loc("transpose_1347"(#loc1255))
+#loc3346 = loc("reshape_1348.dc.squeeze.0"(#loc1256))
+#loc3347 = loc("transpose_1349"(#loc1257))
+#loc3348 = loc("matmul_1350"(#loc1258))
+#loc3349 = loc("reshape_1351.dc.unsqueeze.0"(#loc1259))
+#loc3350 = loc("transpose_1352"(#loc1260))
+#loc3351 = loc("reshape_1353"(#loc1261))
+#loc3352 = loc("matmul_1355"(#loc1262))
+#loc3353 = loc("reshape_1356.dc.unsqueeze.0"(#loc1263))
+#loc3354 = loc("add_1357"(#loc1264))
+#loc3355 = loc("multiply_1358"(#loc1265))
+#loc3356 = loc("reduce_avg_1359"(#loc1266))
+#loc3357 = loc("add_1360"(#loc1267))
+#loc3358 = loc("sqrt_1361"(#loc1268))
+#loc3359 = loc("reciprocal_1362"(#loc1269))
+#loc3360 = loc("multiply_1363"(#loc1270))
+#loc3361 = loc("multiply_1364"(#loc1271))
+#loc3362 = loc("reshape_1365.dc.squeeze.0"(#loc1272))
+#loc3363 = loc("matmul_1367"(#loc1273))
+#loc3364 = loc("reshape_1368.dc.unsqueeze.0"(#loc1274))
+#loc3365 = loc("sigmoid_1369"(#loc1275))
+#loc3366 = loc("multiply_1370"(#loc1276))
+#loc3367 = loc("matmul_1372"(#loc1277))
+#loc3368 = loc("reshape_1373.dc.unsqueeze.0"(#loc1278))
+#loc3369 = loc("multiply_1374"(#loc1279))
+#loc3370 = loc("matmul_1376"(#loc1280))
+#loc3371 = loc("add_1377"(#loc1281))
+#loc3372 = loc("multiply_1378"(#loc1282))
+#loc3373 = loc("reduce_avg_1379"(#loc1283))
+#loc3374 = loc("add_1380"(#loc1284))
+#loc3375 = loc("sqrt_1381"(#loc1285))
+#loc3376 = loc("reciprocal_1382"(#loc1286))
+#loc3377 = loc("multiply_1383"(#loc1287))
+#loc3378 = loc("multiply_1384"(#loc1288))
+#loc3379 = loc("reshape_1385.dc.squeeze.0"(#loc1289))
+#loc3380 = loc("matmul_1387"(#loc1290))
+#loc3381 = loc("reshape_1388"(#loc1291))
+#loc3382 = loc("transpose_1389"(#loc1292))
+#loc3383 = loc("concatenate_1396"(#loc1293))
+#loc3384 = loc("cosine_1397"(#loc1294))
+#loc3385 = loc("unsqueeze_1398"(#loc1295))
+#loc3386 = loc("multiply_1399"(#loc1296))
+#loc3387 = loc("index_1400.dc.transpose.0"(#loc1297))
+#loc3388 = loc("index_1400.dc.matmul.2"(#loc1298))
+#loc3389 = loc("index_1400.dc.transpose.3"(#loc1299))
+#loc3390 = loc("multiply_1401"(#loc1300))
+#loc3391 = loc("index_1402.dc.transpose.0"(#loc1301))
+#loc3392 = loc("index_1402.dc.matmul.2"(#loc1302))
+#loc3393 = loc("index_1402.dc.transpose.3"(#loc1303))
+#loc3394 = loc("concatenate_1403"(#loc1304))
+#loc3395 = loc("sine_1404"(#loc1305))
+#loc3396 = loc("unsqueeze_1405"(#loc1306))
+#loc3397 = loc("multiply_1406"(#loc1307))
+#loc3398 = loc("add_1407"(#loc1308))
+#loc3399 = loc("reshape_1408.dc.squeeze.0"(#loc1309))
+#loc3400 = loc("matmul_1410"(#loc1310))
+#loc3401 = loc("reshape_1411"(#loc1311))
+#loc3402 = loc("transpose_1412"(#loc1312))
+#loc3403 = loc("multiply_1413"(#loc1313))
+#loc3404 = loc("index_1414.dc.transpose.0"(#loc1314))
+#loc3405 = loc("index_1414.dc.matmul.2"(#loc1315))
+#loc3406 = loc("index_1414.dc.transpose.3"(#loc1316))
+#loc3407 = loc("multiply_1415"(#loc1317))
+#loc3408 = loc("index_1416.dc.transpose.0"(#loc1318))
+#loc3409 = loc("index_1416.dc.matmul.2"(#loc1319))
+#loc3410 = loc("index_1416.dc.transpose.3"(#loc1320))
+#loc3411 = loc("concatenate_1417"(#loc1321))
+#loc3412 = loc("multiply_1418"(#loc1322))
+#loc3413 = loc("add_1419"(#loc1323))
+#loc3414 = loc("reshape_1420.dc.squeeze.0"(#loc1324))
+#loc3415 = loc("transpose_1421"(#loc1325))
+#loc3416 = loc("matmul_1422"(#loc1326))
+#loc3417 = loc("reshape_1423.dc.unsqueeze.0"(#loc1327))
+#loc3418 = loc("multiply_1424"(#loc1328))
+#loc3419 = loc("add_1425"(#loc1329))
+#loc3420 = loc("softmax_1426"(#loc1330))
+#loc3421 = loc("reshape_1428.dc.squeeze.0"(#loc1331))
+#loc3422 = loc("matmul_1430"(#loc1332))
+#loc3423 = loc("reshape_1431"(#loc1333))
+#loc3424 = loc("transpose_1432"(#loc1334))
+#loc3425 = loc("transpose_1433"(#loc1335))
+#loc3426 = loc("reshape_1434.dc.squeeze.0"(#loc1336))
+#loc3427 = loc("transpose_1435"(#loc1337))
+#loc3428 = loc("matmul_1436"(#loc1338))
+#loc3429 = loc("reshape_1437.dc.unsqueeze.0"(#loc1339))
+#loc3430 = loc("transpose_1438"(#loc1340))
+#loc3431 = loc("reshape_1439"(#loc1341))
+#loc3432 = loc("matmul_1441"(#loc1342))
+#loc3433 = loc("reshape_1442.dc.unsqueeze.0"(#loc1343))
+#loc3434 = loc("add_1443"(#loc1344))
+#loc3435 = loc("multiply_1444"(#loc1345))
+#loc3436 = loc("reduce_avg_1445"(#loc1346))
+#loc3437 = loc("add_1446"(#loc1347))
+#loc3438 = loc("sqrt_1447"(#loc1348))
+#loc3439 = loc("reciprocal_1448"(#loc1349))
+#loc3440 = loc("multiply_1449"(#loc1350))
+#loc3441 = loc("multiply_1450"(#loc1351))
+#loc3442 = loc("reshape_1451.dc.squeeze.0"(#loc1352))
+#loc3443 = loc("matmul_1453"(#loc1353))
+#loc3444 = loc("reshape_1454.dc.unsqueeze.0"(#loc1354))
+#loc3445 = loc("sigmoid_1455"(#loc1355))
+#loc3446 = loc("multiply_1456"(#loc1356))
+#loc3447 = loc("matmul_1458"(#loc1357))
+#loc3448 = loc("reshape_1459.dc.unsqueeze.0"(#loc1358))
+#loc3449 = loc("multiply_1460"(#loc1359))
+#loc3450 = loc("matmul_1462"(#loc1360))
+#loc3451 = loc("add_1463"(#loc1361))
+#loc3452 = loc("multiply_1464"(#loc1362))
+#loc3453 = loc("reduce_avg_1465"(#loc1363))
+#loc3454 = loc("add_1466"(#loc1364))
+#loc3455 = loc("sqrt_1467"(#loc1365))
+#loc3456 = loc("reciprocal_1468"(#loc1366))
+#loc3457 = loc("multiply_1469"(#loc1367))
+#loc3458 = loc("multiply_1470"(#loc1368))
+#loc3459 = loc("reshape_1471.dc.squeeze.0"(#loc1369))
+#loc3460 = loc("matmul_1473"(#loc1370))
+#loc3461 = loc("reshape_1474"(#loc1371))
+#loc3462 = loc("transpose_1475"(#loc1372))
+#loc3463 = loc("concatenate_1482"(#loc1373))
+#loc3464 = loc("cosine_1483"(#loc1374))
+#loc3465 = loc("unsqueeze_1484"(#loc1375))
+#loc3466 = loc("multiply_1485"(#loc1376))
+#loc3467 = loc("index_1486.dc.transpose.0"(#loc1377))
+#loc3468 = loc("index_1486.dc.matmul.2"(#loc1378))
+#loc3469 = loc("index_1486.dc.transpose.3"(#loc1379))
+#loc3470 = loc("multiply_1487"(#loc1380))
+#loc3471 = loc("index_1488.dc.transpose.0"(#loc1381))
+#loc3472 = loc("index_1488.dc.matmul.2"(#loc1382))
+#loc3473 = loc("index_1488.dc.transpose.3"(#loc1383))
+#loc3474 = loc("concatenate_1489"(#loc1384))
+#loc3475 = loc("sine_1490"(#loc1385))
+#loc3476 = loc("unsqueeze_1491"(#loc1386))
+#loc3477 = loc("multiply_1492"(#loc1387))
+#loc3478 = loc("add_1493"(#loc1388))
+#loc3479 = loc("reshape_1494.dc.squeeze.0"(#loc1389))
+#loc3480 = loc("matmul_1496"(#loc1390))
+#loc3481 = loc("reshape_1497"(#loc1391))
+#loc3482 = loc("transpose_1498"(#loc1392))
+#loc3483 = loc("multiply_1499"(#loc1393))
+#loc3484 = loc("index_1500.dc.transpose.0"(#loc1394))
+#loc3485 = loc("index_1500.dc.matmul.2"(#loc1395))
+#loc3486 = loc("index_1500.dc.transpose.3"(#loc1396))
+#loc3487 = loc("multiply_1501"(#loc1397))
+#loc3488 = loc("index_1502.dc.transpose.0"(#loc1398))
+#loc3489 = loc("index_1502.dc.matmul.2"(#loc1399))
+#loc3490 = loc("index_1502.dc.transpose.3"(#loc1400))
+#loc3491 = loc("concatenate_1503"(#loc1401))
+#loc3492 = loc("multiply_1504"(#loc1402))
+#loc3493 = loc("add_1505"(#loc1403))
+#loc3494 = loc("reshape_1506.dc.squeeze.0"(#loc1404))
+#loc3495 = loc("transpose_1507"(#loc1405))
+#loc3496 = loc("matmul_1508"(#loc1406))
+#loc3497 = loc("reshape_1509.dc.unsqueeze.0"(#loc1407))
+#loc3498 = loc("multiply_1510"(#loc1408))
+#loc3499 = loc("add_1511"(#loc1409))
+#loc3500 = loc("softmax_1512"(#loc1410))
+#loc3501 = loc("reshape_1514.dc.squeeze.0"(#loc1411))
+#loc3502 = loc("matmul_1516"(#loc1412))
+#loc3503 = loc("reshape_1517"(#loc1413))
+#loc3504 = loc("transpose_1518"(#loc1414))
+#loc3505 = loc("transpose_1519"(#loc1415))
+#loc3506 = loc("reshape_1520.dc.squeeze.0"(#loc1416))
+#loc3507 = loc("transpose_1521"(#loc1417))
+#loc3508 = loc("matmul_1522"(#loc1418))
+#loc3509 = loc("reshape_1523.dc.unsqueeze.0"(#loc1419))
+#loc3510 = loc("transpose_1524"(#loc1420))
+#loc3511 = loc("reshape_1525"(#loc1421))
+#loc3512 = loc("matmul_1527"(#loc1422))
+#loc3513 = loc("reshape_1528.dc.unsqueeze.0"(#loc1423))
+#loc3514 = loc("add_1529"(#loc1424))
+#loc3515 = loc("multiply_1530"(#loc1425))
+#loc3516 = loc("reduce_avg_1531"(#loc1426))
+#loc3517 = loc("add_1532"(#loc1427))
+#loc3518 = loc("sqrt_1533"(#loc1428))
+#loc3519 = loc("reciprocal_1534"(#loc1429))
+#loc3520 = loc("multiply_1535"(#loc1430))
+#loc3521 = loc("multiply_1536"(#loc1431))
+#loc3522 = loc("reshape_1537.dc.squeeze.0"(#loc1432))
+#loc3523 = loc("matmul_1539"(#loc1433))
+#loc3524 = loc("reshape_1540.dc.unsqueeze.0"(#loc1434))
+#loc3525 = loc("sigmoid_1541"(#loc1435))
+#loc3526 = loc("multiply_1542"(#loc1436))
+#loc3527 = loc("matmul_1544"(#loc1437))
+#loc3528 = loc("reshape_1545.dc.unsqueeze.0"(#loc1438))
+#loc3529 = loc("multiply_1546"(#loc1439))
+#loc3530 = loc("matmul_1548"(#loc1440))
+#loc3531 = loc("add_1549"(#loc1441))
+#loc3532 = loc("multiply_1550"(#loc1442))
+#loc3533 = loc("reduce_avg_1551"(#loc1443))
+#loc3534 = loc("add_1552"(#loc1444))
+#loc3535 = loc("sqrt_1553"(#loc1445))
+#loc3536 = loc("reciprocal_1554"(#loc1446))
+#loc3537 = loc("multiply_1555"(#loc1447))
+#loc3538 = loc("multiply_1556"(#loc1448))
+#loc3539 = loc("reshape_1557.dc.squeeze.0"(#loc1449))
+#loc3540 = loc("matmul_1559"(#loc1450))
+#loc3541 = loc("reshape_1560"(#loc1451))
+#loc3542 = loc("transpose_1561"(#loc1452))
+#loc3543 = loc("concatenate_1568"(#loc1453))
+#loc3544 = loc("cosine_1569"(#loc1454))
+#loc3545 = loc("unsqueeze_1570"(#loc1455))
+#loc3546 = loc("multiply_1571"(#loc1456))
+#loc3547 = loc("index_1572.dc.transpose.0"(#loc1457))
+#loc3548 = loc("index_1572.dc.matmul.2"(#loc1458))
+#loc3549 = loc("index_1572.dc.transpose.3"(#loc1459))
+#loc3550 = loc("multiply_1573"(#loc1460))
+#loc3551 = loc("index_1574.dc.transpose.0"(#loc1461))
+#loc3552 = loc("index_1574.dc.matmul.2"(#loc1462))
+#loc3553 = loc("index_1574.dc.transpose.3"(#loc1463))
+#loc3554 = loc("concatenate_1575"(#loc1464))
+#loc3555 = loc("sine_1576"(#loc1465))
+#loc3556 = loc("unsqueeze_1577"(#loc1466))
+#loc3557 = loc("multiply_1578"(#loc1467))
+#loc3558 = loc("add_1579"(#loc1468))
+#loc3559 = loc("reshape_1580.dc.squeeze.0"(#loc1469))
+#loc3560 = loc("matmul_1582"(#loc1470))
+#loc3561 = loc("reshape_1583"(#loc1471))
+#loc3562 = loc("transpose_1584"(#loc1472))
+#loc3563 = loc("multiply_1585"(#loc1473))
+#loc3564 = loc("index_1586.dc.transpose.0"(#loc1474))
+#loc3565 = loc("index_1586.dc.matmul.2"(#loc1475))
+#loc3566 = loc("index_1586.dc.transpose.3"(#loc1476))
+#loc3567 = loc("multiply_1587"(#loc1477))
+#loc3568 = loc("index_1588.dc.transpose.0"(#loc1478))
+#loc3569 = loc("index_1588.dc.matmul.2"(#loc1479))
+#loc3570 = loc("index_1588.dc.transpose.3"(#loc1480))
+#loc3571 = loc("concatenate_1589"(#loc1481))
+#loc3572 = loc("multiply_1590"(#loc1482))
+#loc3573 = loc("add_1591"(#loc1483))
+#loc3574 = loc("reshape_1592.dc.squeeze.0"(#loc1484))
+#loc3575 = loc("transpose_1593"(#loc1485))
+#loc3576 = loc("matmul_1594"(#loc1486))
+#loc3577 = loc("reshape_1595.dc.unsqueeze.0"(#loc1487))
+#loc3578 = loc("multiply_1596"(#loc1488))
+#loc3579 = loc("add_1597"(#loc1489))
+#loc3580 = loc("softmax_1598"(#loc1490))
+#loc3581 = loc("reshape_1600.dc.squeeze.0"(#loc1491))
+#loc3582 = loc("matmul_1602"(#loc1492))
+#loc3583 = loc("reshape_1603"(#loc1493))
+#loc3584 = loc("transpose_1604"(#loc1494))
+#loc3585 = loc("transpose_1605"(#loc1495))
+#loc3586 = loc("reshape_1606.dc.squeeze.0"(#loc1496))
+#loc3587 = loc("transpose_1607"(#loc1497))
+#loc3588 = loc("matmul_1608"(#loc1498))
+#loc3589 = loc("reshape_1609.dc.unsqueeze.0"(#loc1499))
+#loc3590 = loc("transpose_1610"(#loc1500))
+#loc3591 = loc("reshape_1611"(#loc1501))
+#loc3592 = loc("matmul_1613"(#loc1502))
+#loc3593 = loc("reshape_1614.dc.unsqueeze.0"(#loc1503))
+#loc3594 = loc("add_1615"(#loc1504))
+#loc3595 = loc("multiply_1616"(#loc1505))
+#loc3596 = loc("reduce_avg_1617"(#loc1506))
+#loc3597 = loc("add_1618"(#loc1507))
+#loc3598 = loc("sqrt_1619"(#loc1508))
+#loc3599 = loc("reciprocal_1620"(#loc1509))
+#loc3600 = loc("multiply_1621"(#loc1510))
+#loc3601 = loc("multiply_1622"(#loc1511))
+#loc3602 = loc("reshape_1623.dc.squeeze.0"(#loc1512))
+#loc3603 = loc("matmul_1625"(#loc1513))
+#loc3604 = loc("reshape_1626.dc.unsqueeze.0"(#loc1514))
+#loc3605 = loc("sigmoid_1627"(#loc1515))
+#loc3606 = loc("multiply_1628"(#loc1516))
+#loc3607 = loc("matmul_1630"(#loc1517))
+#loc3608 = loc("reshape_1631.dc.unsqueeze.0"(#loc1518))
+#loc3609 = loc("multiply_1632"(#loc1519))
+#loc3610 = loc("matmul_1634"(#loc1520))
+#loc3611 = loc("add_1635"(#loc1521))
+#loc3612 = loc("multiply_1636"(#loc1522))
+#loc3613 = loc("reduce_avg_1637"(#loc1523))
+#loc3614 = loc("add_1638"(#loc1524))
+#loc3615 = loc("sqrt_1639"(#loc1525))
+#loc3616 = loc("reciprocal_1640"(#loc1526))
+#loc3617 = loc("multiply_1641"(#loc1527))
+#loc3618 = loc("multiply_1642"(#loc1528))
+#loc3619 = loc("reshape_1643.dc.squeeze.0"(#loc1529))
+#loc3620 = loc("matmul_1645"(#loc1530))
+#loc3621 = loc("reshape_1646"(#loc1531))
+#loc3622 = loc("transpose_1647"(#loc1532))
+#loc3623 = loc("concatenate_1654"(#loc1533))
+#loc3624 = loc("cosine_1655"(#loc1534))
+#loc3625 = loc("unsqueeze_1656"(#loc1535))
+#loc3626 = loc("multiply_1657"(#loc1536))
+#loc3627 = loc("index_1658.dc.transpose.0"(#loc1537))
+#loc3628 = loc("index_1658.dc.matmul.2"(#loc1538))
+#loc3629 = loc("index_1658.dc.transpose.3"(#loc1539))
+#loc3630 = loc("multiply_1659"(#loc1540))
+#loc3631 = loc("index_1660.dc.transpose.0"(#loc1541))
+#loc3632 = loc("index_1660.dc.matmul.2"(#loc1542))
+#loc3633 = loc("index_1660.dc.transpose.3"(#loc1543))
+#loc3634 = loc("concatenate_1661"(#loc1544))
+#loc3635 = loc("sine_1662"(#loc1545))
+#loc3636 = loc("unsqueeze_1663"(#loc1546))
+#loc3637 = loc("multiply_1664"(#loc1547))
+#loc3638 = loc("add_1665"(#loc1548))
+#loc3639 = loc("reshape_1666.dc.squeeze.0"(#loc1549))
+#loc3640 = loc("matmul_1668"(#loc1550))
+#loc3641 = loc("reshape_1669"(#loc1551))
+#loc3642 = loc("transpose_1670"(#loc1552))
+#loc3643 = loc("multiply_1671"(#loc1553))
+#loc3644 = loc("index_1672.dc.transpose.0"(#loc1554))
+#loc3645 = loc("index_1672.dc.matmul.2"(#loc1555))
+#loc3646 = loc("index_1672.dc.transpose.3"(#loc1556))
+#loc3647 = loc("multiply_1673"(#loc1557))
+#loc3648 = loc("index_1674.dc.transpose.0"(#loc1558))
+#loc3649 = loc("index_1674.dc.matmul.2"(#loc1559))
+#loc3650 = loc("index_1674.dc.transpose.3"(#loc1560))
+#loc3651 = loc("concatenate_1675"(#loc1561))
+#loc3652 = loc("multiply_1676"(#loc1562))
+#loc3653 = loc("add_1677"(#loc1563))
+#loc3654 = loc("reshape_1678.dc.squeeze.0"(#loc1564))
+#loc3655 = loc("transpose_1679"(#loc1565))
+#loc3656 = loc("matmul_1680"(#loc1566))
+#loc3657 = loc("reshape_1681.dc.unsqueeze.0"(#loc1567))
+#loc3658 = loc("multiply_1682"(#loc1568))
+#loc3659 = loc("add_1683"(#loc1569))
+#loc3660 = loc("softmax_1684"(#loc1570))
+#loc3661 = loc("reshape_1686.dc.squeeze.0"(#loc1571))
+#loc3662 = loc("matmul_1688"(#loc1572))
+#loc3663 = loc("reshape_1689"(#loc1573))
+#loc3664 = loc("transpose_1690"(#loc1574))
+#loc3665 = loc("transpose_1691"(#loc1575))
+#loc3666 = loc("reshape_1692.dc.squeeze.0"(#loc1576))
+#loc3667 = loc("transpose_1693"(#loc1577))
+#loc3668 = loc("matmul_1694"(#loc1578))
+#loc3669 = loc("reshape_1695.dc.unsqueeze.0"(#loc1579))
+#loc3670 = loc("transpose_1696"(#loc1580))
+#loc3671 = loc("reshape_1697"(#loc1581))
+#loc3672 = loc("matmul_1699"(#loc1582))
+#loc3673 = loc("reshape_1700.dc.unsqueeze.0"(#loc1583))
+#loc3674 = loc("add_1701"(#loc1584))
+#loc3675 = loc("multiply_1702"(#loc1585))
+#loc3676 = loc("reduce_avg_1703"(#loc1586))
+#loc3677 = loc("add_1704"(#loc1587))
+#loc3678 = loc("sqrt_1705"(#loc1588))
+#loc3679 = loc("reciprocal_1706"(#loc1589))
+#loc3680 = loc("multiply_1707"(#loc1590))
+#loc3681 = loc("multiply_1708"(#loc1591))
+#loc3682 = loc("reshape_1709.dc.squeeze.0"(#loc1592))
+#loc3683 = loc("matmul_1711"(#loc1593))
+#loc3684 = loc("reshape_1712.dc.unsqueeze.0"(#loc1594))
+#loc3685 = loc("sigmoid_1713"(#loc1595))
+#loc3686 = loc("multiply_1714"(#loc1596))
+#loc3687 = loc("matmul_1716"(#loc1597))
+#loc3688 = loc("reshape_1717.dc.unsqueeze.0"(#loc1598))
+#loc3689 = loc("multiply_1718"(#loc1599))
+#loc3690 = loc("matmul_1720"(#loc1600))
+#loc3691 = loc("add_1721"(#loc1601))
+#loc3692 = loc("multiply_1722"(#loc1602))
+#loc3693 = loc("reduce_avg_1723"(#loc1603))
+#loc3694 = loc("add_1724"(#loc1604))
+#loc3695 = loc("sqrt_1725"(#loc1605))
+#loc3696 = loc("reciprocal_1726"(#loc1606))
+#loc3697 = loc("multiply_1727"(#loc1607))
+#loc3698 = loc("multiply_1728"(#loc1608))
+#loc3699 = loc("reshape_1729.dc.squeeze.0"(#loc1609))
+#loc3700 = loc("matmul_1731"(#loc1610))
+#loc3701 = loc("reshape_1732"(#loc1611))
+#loc3702 = loc("transpose_1733"(#loc1612))
+#loc3703 = loc("concatenate_1740"(#loc1613))
+#loc3704 = loc("cosine_1741"(#loc1614))
+#loc3705 = loc("unsqueeze_1742"(#loc1615))
+#loc3706 = loc("multiply_1743"(#loc1616))
+#loc3707 = loc("index_1744.dc.transpose.0"(#loc1617))
+#loc3708 = loc("index_1744.dc.matmul.2"(#loc1618))
+#loc3709 = loc("index_1744.dc.transpose.3"(#loc1619))
+#loc3710 = loc("multiply_1745"(#loc1620))
+#loc3711 = loc("index_1746.dc.transpose.0"(#loc1621))
+#loc3712 = loc("index_1746.dc.matmul.2"(#loc1622))
+#loc3713 = loc("index_1746.dc.transpose.3"(#loc1623))
+#loc3714 = loc("concatenate_1747"(#loc1624))
+#loc3715 = loc("sine_1748"(#loc1625))
+#loc3716 = loc("unsqueeze_1749"(#loc1626))
+#loc3717 = loc("multiply_1750"(#loc1627))
+#loc3718 = loc("add_1751"(#loc1628))
+#loc3719 = loc("reshape_1752.dc.squeeze.0"(#loc1629))
+#loc3720 = loc("matmul_1754"(#loc1630))
+#loc3721 = loc("reshape_1755"(#loc1631))
+#loc3722 = loc("transpose_1756"(#loc1632))
+#loc3723 = loc("multiply_1757"(#loc1633))
+#loc3724 = loc("index_1758.dc.transpose.0"(#loc1634))
+#loc3725 = loc("index_1758.dc.matmul.2"(#loc1635))
+#loc3726 = loc("index_1758.dc.transpose.3"(#loc1636))
+#loc3727 = loc("multiply_1759"(#loc1637))
+#loc3728 = loc("index_1760.dc.transpose.0"(#loc1638))
+#loc3729 = loc("index_1760.dc.matmul.2"(#loc1639))
+#loc3730 = loc("index_1760.dc.transpose.3"(#loc1640))
+#loc3731 = loc("concatenate_1761"(#loc1641))
+#loc3732 = loc("multiply_1762"(#loc1642))
+#loc3733 = loc("add_1763"(#loc1643))
+#loc3734 = loc("reshape_1764.dc.squeeze.0"(#loc1644))
+#loc3735 = loc("transpose_1765"(#loc1645))
+#loc3736 = loc("matmul_1766"(#loc1646))
+#loc3737 = loc("reshape_1767.dc.unsqueeze.0"(#loc1647))
+#loc3738 = loc("multiply_1768"(#loc1648))
+#loc3739 = loc("add_1769"(#loc1649))
+#loc3740 = loc("softmax_1770"(#loc1650))
+#loc3741 = loc("reshape_1772.dc.squeeze.0"(#loc1651))
+#loc3742 = loc("matmul_1774"(#loc1652))
+#loc3743 = loc("reshape_1775"(#loc1653))
+#loc3744 = loc("transpose_1776"(#loc1654))
+#loc3745 = loc("transpose_1777"(#loc1655))
+#loc3746 = loc("reshape_1778.dc.squeeze.0"(#loc1656))
+#loc3747 = loc("transpose_1779"(#loc1657))
+#loc3748 = loc("matmul_1780"(#loc1658))
+#loc3749 = loc("reshape_1781.dc.unsqueeze.0"(#loc1659))
+#loc3750 = loc("transpose_1782"(#loc1660))
+#loc3751 = loc("reshape_1783"(#loc1661))
+#loc3752 = loc("matmul_1785"(#loc1662))
+#loc3753 = loc("reshape_1786.dc.unsqueeze.0"(#loc1663))
+#loc3754 = loc("add_1787"(#loc1664))
+#loc3755 = loc("multiply_1788"(#loc1665))
+#loc3756 = loc("reduce_avg_1789"(#loc1666))
+#loc3757 = loc("add_1790"(#loc1667))
+#loc3758 = loc("sqrt_1791"(#loc1668))
+#loc3759 = loc("reciprocal_1792"(#loc1669))
+#loc3760 = loc("multiply_1793"(#loc1670))
+#loc3761 = loc("multiply_1794"(#loc1671))
+#loc3762 = loc("reshape_1795.dc.squeeze.0"(#loc1672))
+#loc3763 = loc("matmul_1797"(#loc1673))
+#loc3764 = loc("reshape_1798.dc.unsqueeze.0"(#loc1674))
+#loc3765 = loc("sigmoid_1799"(#loc1675))
+#loc3766 = loc("multiply_1800"(#loc1676))
+#loc3767 = loc("matmul_1802"(#loc1677))
+#loc3768 = loc("reshape_1803.dc.unsqueeze.0"(#loc1678))
+#loc3769 = loc("multiply_1804"(#loc1679))
+#loc3770 = loc("matmul_1806"(#loc1680))
+#loc3771 = loc("add_1807"(#loc1681))
+#loc3772 = loc("multiply_1808"(#loc1682))
+#loc3773 = loc("reduce_avg_1809"(#loc1683))
+#loc3774 = loc("add_1810"(#loc1684))
+#loc3775 = loc("sqrt_1811"(#loc1685))
+#loc3776 = loc("reciprocal_1812"(#loc1686))
+#loc3777 = loc("multiply_1813"(#loc1687))
+#loc3778 = loc("multiply_1814"(#loc1688))
+#loc3779 = loc("reshape_1815.dc.squeeze.0"(#loc1689))
+#loc3780 = loc("matmul_1817"(#loc1690))
+#loc3781 = loc("reshape_1818"(#loc1691))
+#loc3782 = loc("transpose_1819"(#loc1692))
+#loc3783 = loc("concatenate_1826"(#loc1693))
+#loc3784 = loc("cosine_1827"(#loc1694))
+#loc3785 = loc("unsqueeze_1828"(#loc1695))
+#loc3786 = loc("multiply_1829"(#loc1696))
+#loc3787 = loc("index_1830.dc.transpose.0"(#loc1697))
+#loc3788 = loc("index_1830.dc.matmul.2"(#loc1698))
+#loc3789 = loc("index_1830.dc.transpose.3"(#loc1699))
+#loc3790 = loc("multiply_1831"(#loc1700))
+#loc3791 = loc("index_1832.dc.transpose.0"(#loc1701))
+#loc3792 = loc("index_1832.dc.matmul.2"(#loc1702))
+#loc3793 = loc("index_1832.dc.transpose.3"(#loc1703))
+#loc3794 = loc("concatenate_1833"(#loc1704))
+#loc3795 = loc("sine_1834"(#loc1705))
+#loc3796 = loc("unsqueeze_1835"(#loc1706))
+#loc3797 = loc("multiply_1836"(#loc1707))
+#loc3798 = loc("add_1837"(#loc1708))
+#loc3799 = loc("reshape_1838.dc.squeeze.0"(#loc1709))
+#loc3800 = loc("matmul_1840"(#loc1710))
+#loc3801 = loc("reshape_1841"(#loc1711))
+#loc3802 = loc("transpose_1842"(#loc1712))
+#loc3803 = loc("multiply_1843"(#loc1713))
+#loc3804 = loc("index_1844.dc.transpose.0"(#loc1714))
+#loc3805 = loc("index_1844.dc.matmul.2"(#loc1715))
+#loc3806 = loc("index_1844.dc.transpose.3"(#loc1716))
+#loc3807 = loc("multiply_1845"(#loc1717))
+#loc3808 = loc("index_1846.dc.transpose.0"(#loc1718))
+#loc3809 = loc("index_1846.dc.matmul.2"(#loc1719))
+#loc3810 = loc("index_1846.dc.transpose.3"(#loc1720))
+#loc3811 = loc("concatenate_1847"(#loc1721))
+#loc3812 = loc("multiply_1848"(#loc1722))
+#loc3813 = loc("add_1849"(#loc1723))
+#loc3814 = loc("reshape_1850.dc.squeeze.0"(#loc1724))
+#loc3815 = loc("transpose_1851"(#loc1725))
+#loc3816 = loc("matmul_1852"(#loc1726))
+#loc3817 = loc("reshape_1853.dc.unsqueeze.0"(#loc1727))
+#loc3818 = loc("multiply_1854"(#loc1728))
+#loc3819 = loc("add_1855"(#loc1729))
+#loc3820 = loc("softmax_1856"(#loc1730))
+#loc3821 = loc("reshape_1858.dc.squeeze.0"(#loc1731))
+#loc3822 = loc("matmul_1860"(#loc1732))
+#loc3823 = loc("reshape_1861"(#loc1733))
+#loc3824 = loc("transpose_1862"(#loc1734))
+#loc3825 = loc("transpose_1863"(#loc1735))
+#loc3826 = loc("reshape_1864.dc.squeeze.0"(#loc1736))
+#loc3827 = loc("transpose_1865"(#loc1737))
+#loc3828 = loc("matmul_1866"(#loc1738))
+#loc3829 = loc("reshape_1867.dc.unsqueeze.0"(#loc1739))
+#loc3830 = loc("transpose_1868"(#loc1740))
+#loc3831 = loc("reshape_1869"(#loc1741))
+#loc3832 = loc("matmul_1871"(#loc1742))
+#loc3833 = loc("reshape_1872.dc.unsqueeze.0"(#loc1743))
+#loc3834 = loc("add_1873"(#loc1744))
+#loc3835 = loc("multiply_1874"(#loc1745))
+#loc3836 = loc("reduce_avg_1875"(#loc1746))
+#loc3837 = loc("add_1876"(#loc1747))
+#loc3838 = loc("sqrt_1877"(#loc1748))
+#loc3839 = loc("reciprocal_1878"(#loc1749))
+#loc3840 = loc("multiply_1879"(#loc1750))
+#loc3841 = loc("multiply_1880"(#loc1751))
+#loc3842 = loc("reshape_1881.dc.squeeze.0"(#loc1752))
+#loc3843 = loc("matmul_1883"(#loc1753))
+#loc3844 = loc("reshape_1884.dc.unsqueeze.0"(#loc1754))
+#loc3845 = loc("sigmoid_1885"(#loc1755))
+#loc3846 = loc("multiply_1886"(#loc1756))
+#loc3847 = loc("matmul_1888"(#loc1757))
+#loc3848 = loc("reshape_1889.dc.unsqueeze.0"(#loc1758))
+#loc3849 = loc("multiply_1890"(#loc1759))
+#loc3850 = loc("matmul_1892"(#loc1760))
+#loc3851 = loc("add_1893"(#loc1761))
+#loc3852 = loc("multiply_1894"(#loc1762))
+#loc3853 = loc("reduce_avg_1895"(#loc1763))
+#loc3854 = loc("add_1896"(#loc1764))
+#loc3855 = loc("sqrt_1897"(#loc1765))
+#loc3856 = loc("reciprocal_1898"(#loc1766))
+#loc3857 = loc("multiply_1899"(#loc1767))
+#loc3858 = loc("multiply_1900"(#loc1768))
+#loc3859 = loc("reshape_1901.dc.squeeze.0"(#loc1769))
+#loc3860 = loc("matmul_1903"(#loc1770))
+#loc3861 = loc("reshape_1904"(#loc1771))
+#loc3862 = loc("transpose_1905"(#loc1772))
+#loc3863 = loc("concatenate_1912"(#loc1773))
+#loc3864 = loc("cosine_1913"(#loc1774))
+#loc3865 = loc("unsqueeze_1914"(#loc1775))
+#loc3866 = loc("multiply_1915"(#loc1776))
+#loc3867 = loc("index_1916.dc.transpose.0"(#loc1777))
+#loc3868 = loc("index_1916.dc.matmul.2"(#loc1778))
+#loc3869 = loc("index_1916.dc.transpose.3"(#loc1779))
+#loc3870 = loc("multiply_1917"(#loc1780))
+#loc3871 = loc("index_1918.dc.transpose.0"(#loc1781))
+#loc3872 = loc("index_1918.dc.matmul.2"(#loc1782))
+#loc3873 = loc("index_1918.dc.transpose.3"(#loc1783))
+#loc3874 = loc("concatenate_1919"(#loc1784))
+#loc3875 = loc("sine_1920"(#loc1785))
+#loc3876 = loc("unsqueeze_1921"(#loc1786))
+#loc3877 = loc("multiply_1922"(#loc1787))
+#loc3878 = loc("add_1923"(#loc1788))
+#loc3879 = loc("reshape_1924.dc.squeeze.0"(#loc1789))
+#loc3880 = loc("matmul_1926"(#loc1790))
+#loc3881 = loc("reshape_1927"(#loc1791))
+#loc3882 = loc("transpose_1928"(#loc1792))
+#loc3883 = loc("multiply_1929"(#loc1793))
+#loc3884 = loc("index_1930.dc.transpose.0"(#loc1794))
+#loc3885 = loc("index_1930.dc.matmul.2"(#loc1795))
+#loc3886 = loc("index_1930.dc.transpose.3"(#loc1796))
+#loc3887 = loc("multiply_1931"(#loc1797))
+#loc3888 = loc("index_1932.dc.transpose.0"(#loc1798))
+#loc3889 = loc("index_1932.dc.matmul.2"(#loc1799))
+#loc3890 = loc("index_1932.dc.transpose.3"(#loc1800))
+#loc3891 = loc("concatenate_1933"(#loc1801))
+#loc3892 = loc("multiply_1934"(#loc1802))
+#loc3893 = loc("add_1935"(#loc1803))
+#loc3894 = loc("reshape_1936.dc.squeeze.0"(#loc1804))
+#loc3895 = loc("transpose_1937"(#loc1805))
+#loc3896 = loc("matmul_1938"(#loc1806))
+#loc3897 = loc("reshape_1939.dc.unsqueeze.0"(#loc1807))
+#loc3898 = loc("multiply_1940"(#loc1808))
+#loc3899 = loc("add_1941"(#loc1809))
+#loc3900 = loc("softmax_1942"(#loc1810))
+#loc3901 = loc("reshape_1944.dc.squeeze.0"(#loc1811))
+#loc3902 = loc("matmul_1946"(#loc1812))
+#loc3903 = loc("reshape_1947"(#loc1813))
+#loc3904 = loc("transpose_1948"(#loc1814))
+#loc3905 = loc("transpose_1949"(#loc1815))
+#loc3906 = loc("reshape_1950.dc.squeeze.0"(#loc1816))
+#loc3907 = loc("transpose_1951"(#loc1817))
+#loc3908 = loc("matmul_1952"(#loc1818))
+#loc3909 = loc("reshape_1953.dc.unsqueeze.0"(#loc1819))
+#loc3910 = loc("transpose_1954"(#loc1820))
+#loc3911 = loc("reshape_1955"(#loc1821))
+#loc3912 = loc("matmul_1957"(#loc1822))
+#loc3913 = loc("reshape_1958.dc.unsqueeze.0"(#loc1823))
+#loc3914 = loc("add_1959"(#loc1824))
+#loc3915 = loc("multiply_1960"(#loc1825))
+#loc3916 = loc("reduce_avg_1961"(#loc1826))
+#loc3917 = loc("add_1962"(#loc1827))
+#loc3918 = loc("sqrt_1963"(#loc1828))
+#loc3919 = loc("reciprocal_1964"(#loc1829))
+#loc3920 = loc("multiply_1965"(#loc1830))
+#loc3921 = loc("multiply_1966"(#loc1831))
+#loc3922 = loc("reshape_1967.dc.squeeze.0"(#loc1832))
+#loc3923 = loc("matmul_1969"(#loc1833))
+#loc3924 = loc("reshape_1970.dc.unsqueeze.0"(#loc1834))
+#loc3925 = loc("sigmoid_1971"(#loc1835))
+#loc3926 = loc("multiply_1972"(#loc1836))
+#loc3927 = loc("matmul_1974"(#loc1837))
+#loc3928 = loc("reshape_1975.dc.unsqueeze.0"(#loc1838))
+#loc3929 = loc("multiply_1976"(#loc1839))
+#loc3930 = loc("matmul_1978"(#loc1840))
+#loc3931 = loc("add_1979"(#loc1841))
+#loc3932 = loc("multiply_1980"(#loc1842))
+#loc3933 = loc("reduce_avg_1981"(#loc1843))
+#loc3934 = loc("add_1982"(#loc1844))
+#loc3935 = loc("sqrt_1983"(#loc1845))
+#loc3936 = loc("reciprocal_1984"(#loc1846))
+#loc3937 = loc("multiply_1985"(#loc1847))
+#loc3938 = loc("multiply_1986"(#loc1848))
+#loc3939 = loc("reshape_1987.dc.squeeze.0"(#loc1849))
+#loc3940 = loc("matmul_1989"(#loc1850))
+#loc3941 = loc("reshape_1990"(#loc1851))
+#loc3942 = loc("transpose_1991"(#loc1852))
+#loc3943 = loc("concatenate_1998"(#loc1853))
+#loc3944 = loc("cosine_1999"(#loc1854))
+#loc3945 = loc("unsqueeze_2000"(#loc1855))
+#loc3946 = loc("multiply_2001"(#loc1856))
+#loc3947 = loc("index_2002.dc.transpose.0"(#loc1857))
+#loc3948 = loc("index_2002.dc.matmul.2"(#loc1858))
+#loc3949 = loc("index_2002.dc.transpose.3"(#loc1859))
+#loc3950 = loc("multiply_2003"(#loc1860))
+#loc3951 = loc("index_2004.dc.transpose.0"(#loc1861))
+#loc3952 = loc("index_2004.dc.matmul.2"(#loc1862))
+#loc3953 = loc("index_2004.dc.transpose.3"(#loc1863))
+#loc3954 = loc("concatenate_2005"(#loc1864))
+#loc3955 = loc("sine_2006"(#loc1865))
+#loc3956 = loc("unsqueeze_2007"(#loc1866))
+#loc3957 = loc("multiply_2008"(#loc1867))
+#loc3958 = loc("add_2009"(#loc1868))
+#loc3959 = loc("reshape_2010.dc.squeeze.0"(#loc1869))
+#loc3960 = loc("matmul_2012"(#loc1870))
+#loc3961 = loc("reshape_2013"(#loc1871))
+#loc3962 = loc("transpose_2014"(#loc1872))
+#loc3963 = loc("multiply_2015"(#loc1873))
+#loc3964 = loc("index_2016.dc.transpose.0"(#loc1874))
+#loc3965 = loc("index_2016.dc.matmul.2"(#loc1875))
+#loc3966 = loc("index_2016.dc.transpose.3"(#loc1876))
+#loc3967 = loc("multiply_2017"(#loc1877))
+#loc3968 = loc("index_2018.dc.transpose.0"(#loc1878))
+#loc3969 = loc("index_2018.dc.matmul.2"(#loc1879))
+#loc3970 = loc("index_2018.dc.transpose.3"(#loc1880))
+#loc3971 = loc("concatenate_2019"(#loc1881))
+#loc3972 = loc("multiply_2020"(#loc1882))
+#loc3973 = loc("add_2021"(#loc1883))
+#loc3974 = loc("reshape_2022.dc.squeeze.0"(#loc1884))
+#loc3975 = loc("transpose_2023"(#loc1885))
+#loc3976 = loc("matmul_2024"(#loc1886))
+#loc3977 = loc("reshape_2025.dc.unsqueeze.0"(#loc1887))
+#loc3978 = loc("multiply_2026"(#loc1888))
+#loc3979 = loc("add_2027"(#loc1889))
+#loc3980 = loc("softmax_2028"(#loc1890))
+#loc3981 = loc("reshape_2030.dc.squeeze.0"(#loc1891))
+#loc3982 = loc("matmul_2032"(#loc1892))
+#loc3983 = loc("reshape_2033"(#loc1893))
+#loc3984 = loc("transpose_2034"(#loc1894))
+#loc3985 = loc("transpose_2035"(#loc1895))
+#loc3986 = loc("reshape_2036.dc.squeeze.0"(#loc1896))
+#loc3987 = loc("transpose_2037"(#loc1897))
+#loc3988 = loc("matmul_2038"(#loc1898))
+#loc3989 = loc("reshape_2039.dc.unsqueeze.0"(#loc1899))
+#loc3990 = loc("transpose_2040"(#loc1900))
+#loc3991 = loc("reshape_2041"(#loc1901))
+#loc3992 = loc("matmul_2043"(#loc1902))
+#loc3993 = loc("reshape_2044.dc.unsqueeze.0"(#loc1903))
+#loc3994 = loc("add_2045"(#loc1904))
+#loc3995 = loc("multiply_2046"(#loc1905))
+#loc3996 = loc("reduce_avg_2047"(#loc1906))
+#loc3997 = loc("add_2048"(#loc1907))
+#loc3998 = loc("sqrt_2049"(#loc1908))
+#loc3999 = loc("reciprocal_2050"(#loc1909))
+#loc4000 = loc("multiply_2051"(#loc1910))
+#loc4001 = loc("multiply_2052"(#loc1911))
+#loc4002 = loc("reshape_2053.dc.squeeze.0"(#loc1912))
+#loc4003 = loc("matmul_2055"(#loc1913))
+#loc4004 = loc("reshape_2056.dc.unsqueeze.0"(#loc1914))
+#loc4005 = loc("sigmoid_2057"(#loc1915))
+#loc4006 = loc("multiply_2058"(#loc1916))
+#loc4007 = loc("matmul_2060"(#loc1917))
+#loc4008 = loc("reshape_2061.dc.unsqueeze.0"(#loc1918))
+#loc4009 = loc("multiply_2062"(#loc1919))
+#loc4010 = loc("matmul_2064"(#loc1920))
+#loc4011 = loc("add_2065"(#loc1921))
+#loc4012 = loc("multiply_2066"(#loc1922))
+#loc4013 = loc("reduce_avg_2067"(#loc1923))
+#loc4014 = loc("add_2068"(#loc1924))
+#loc4015 = loc("sqrt_2069"(#loc1925))
+#loc4016 = loc("reciprocal_2070"(#loc1926))
+#loc4017 = loc("multiply_2071"(#loc1927))
+#loc4018 = loc("multiply_2072"(#loc1928))
+#loc4019 = loc("reshape_2073.dc.squeeze.0"(#loc1929))
+#loc4020 = loc("matmul_2075"(#loc1930))
+#loc4021 = loc("reshape_2076"(#loc1931))
+#loc4022 = loc("transpose_2077"(#loc1932))
+#loc4023 = loc("concatenate_2084"(#loc1933))
+#loc4024 = loc("cosine_2085"(#loc1934))
+#loc4025 = loc("unsqueeze_2086"(#loc1935))
+#loc4026 = loc("multiply_2087"(#loc1936))
+#loc4027 = loc("index_2088.dc.transpose.0"(#loc1937))
+#loc4028 = loc("index_2088.dc.matmul.2"(#loc1938))
+#loc4029 = loc("index_2088.dc.transpose.3"(#loc1939))
+#loc4030 = loc("multiply_2089"(#loc1940))
+#loc4031 = loc("index_2090.dc.transpose.0"(#loc1941))
+#loc4032 = loc("index_2090.dc.matmul.2"(#loc1942))
+#loc4033 = loc("index_2090.dc.transpose.3"(#loc1943))
+#loc4034 = loc("concatenate_2091"(#loc1944))
+#loc4035 = loc("sine_2092"(#loc1945))
+#loc4036 = loc("unsqueeze_2093"(#loc1946))
+#loc4037 = loc("multiply_2094"(#loc1947))
+#loc4038 = loc("add_2095"(#loc1948))
+#loc4039 = loc("reshape_2096.dc.squeeze.0"(#loc1949))
+#loc4040 = loc("matmul_2098"(#loc1950))
+#loc4041 = loc("reshape_2099"(#loc1951))
+#loc4042 = loc("transpose_2100"(#loc1952))
+#loc4043 = loc("multiply_2101"(#loc1953))
+#loc4044 = loc("index_2102.dc.transpose.0"(#loc1954))
+#loc4045 = loc("index_2102.dc.matmul.2"(#loc1955))
+#loc4046 = loc("index_2102.dc.transpose.3"(#loc1956))
+#loc4047 = loc("multiply_2103"(#loc1957))
+#loc4048 = loc("index_2104.dc.transpose.0"(#loc1958))
+#loc4049 = loc("index_2104.dc.matmul.2"(#loc1959))
+#loc4050 = loc("index_2104.dc.transpose.3"(#loc1960))
+#loc4051 = loc("concatenate_2105"(#loc1961))
+#loc4052 = loc("multiply_2106"(#loc1962))
+#loc4053 = loc("add_2107"(#loc1963))
+#loc4054 = loc("reshape_2108.dc.squeeze.0"(#loc1964))
+#loc4055 = loc("transpose_2109"(#loc1965))
+#loc4056 = loc("matmul_2110"(#loc1966))
+#loc4057 = loc("reshape_2111.dc.unsqueeze.0"(#loc1967))
+#loc4058 = loc("multiply_2112"(#loc1968))
+#loc4059 = loc("add_2113"(#loc1969))
+#loc4060 = loc("softmax_2114"(#loc1970))
+#loc4061 = loc("reshape_2116.dc.squeeze.0"(#loc1971))
+#loc4062 = loc("matmul_2118"(#loc1972))
+#loc4063 = loc("reshape_2119"(#loc1973))
+#loc4064 = loc("transpose_2120"(#loc1974))
+#loc4065 = loc("transpose_2121"(#loc1975))
+#loc4066 = loc("reshape_2122.dc.squeeze.0"(#loc1976))
+#loc4067 = loc("transpose_2123"(#loc1977))
+#loc4068 = loc("matmul_2124"(#loc1978))
+#loc4069 = loc("reshape_2125.dc.unsqueeze.0"(#loc1979))
+#loc4070 = loc("transpose_2126"(#loc1980))
+#loc4071 = loc("reshape_2127"(#loc1981))
+#loc4072 = loc("matmul_2129"(#loc1982))
+#loc4073 = loc("reshape_2130.dc.unsqueeze.0"(#loc1983))
+#loc4074 = loc("add_2131"(#loc1984))
+#loc4075 = loc("multiply_2132"(#loc1985))
+#loc4076 = loc("reduce_avg_2133"(#loc1986))
+#loc4077 = loc("add_2134"(#loc1987))
+#loc4078 = loc("sqrt_2135"(#loc1988))
+#loc4079 = loc("reciprocal_2136"(#loc1989))
+#loc4080 = loc("multiply_2137"(#loc1990))
+#loc4081 = loc("multiply_2138"(#loc1991))
+#loc4082 = loc("reshape_2139.dc.squeeze.0"(#loc1992))
+#loc4083 = loc("matmul_2141"(#loc1993))
+#loc4084 = loc("reshape_2142.dc.unsqueeze.0"(#loc1994))
+#loc4085 = loc("sigmoid_2143"(#loc1995))
+#loc4086 = loc("multiply_2144"(#loc1996))
+#loc4087 = loc("matmul_2146"(#loc1997))
+#loc4088 = loc("reshape_2147.dc.unsqueeze.0"(#loc1998))
+#loc4089 = loc("multiply_2148"(#loc1999))
+#loc4090 = loc("matmul_2150"(#loc2000))
+#loc4091 = loc("add_2151"(#loc2001))
+#loc4092 = loc("multiply_2152"(#loc2002))
+#loc4093 = loc("reduce_avg_2153"(#loc2003))
+#loc4094 = loc("add_2154"(#loc2004))
+#loc4095 = loc("sqrt_2155"(#loc2005))
+#loc4096 = loc("reciprocal_2156"(#loc2006))
+#loc4097 = loc("multiply_2157"(#loc2007))
+#loc4098 = loc("multiply_2158"(#loc2008))
+#loc4099 = loc("reshape_2159.dc.squeeze.0"(#loc2009))
+#loc4100 = loc("matmul_2161"(#loc2010))
+#loc4101 = loc("reshape_2162"(#loc2011))
+#loc4102 = loc("transpose_2163"(#loc2012))
+#loc4103 = loc("concatenate_2170"(#loc2013))
+#loc4104 = loc("cosine_2171"(#loc2014))
+#loc4105 = loc("unsqueeze_2172"(#loc2015))
+#loc4106 = loc("multiply_2173"(#loc2016))
+#loc4107 = loc("index_2174.dc.transpose.0"(#loc2017))
+#loc4108 = loc("index_2174.dc.matmul.2"(#loc2018))
+#loc4109 = loc("index_2174.dc.transpose.3"(#loc2019))
+#loc4110 = loc("multiply_2175"(#loc2020))
+#loc4111 = loc("index_2176.dc.transpose.0"(#loc2021))
+#loc4112 = loc("index_2176.dc.matmul.2"(#loc2022))
+#loc4113 = loc("index_2176.dc.transpose.3"(#loc2023))
+#loc4114 = loc("concatenate_2177"(#loc2024))
+#loc4115 = loc("sine_2178"(#loc2025))
+#loc4116 = loc("unsqueeze_2179"(#loc2026))
+#loc4117 = loc("multiply_2180"(#loc2027))
+#loc4118 = loc("add_2181"(#loc2028))
+#loc4119 = loc("reshape_2182.dc.squeeze.0"(#loc2029))
+#loc4120 = loc("matmul_2184"(#loc2030))
+#loc4121 = loc("reshape_2185"(#loc2031))
+#loc4122 = loc("transpose_2186"(#loc2032))
+#loc4123 = loc("multiply_2187"(#loc2033))
+#loc4124 = loc("index_2188.dc.transpose.0"(#loc2034))
+#loc4125 = loc("index_2188.dc.matmul.2"(#loc2035))
+#loc4126 = loc("index_2188.dc.transpose.3"(#loc2036))
+#loc4127 = loc("multiply_2189"(#loc2037))
+#loc4128 = loc("index_2190.dc.transpose.0"(#loc2038))
+#loc4129 = loc("index_2190.dc.matmul.2"(#loc2039))
+#loc4130 = loc("index_2190.dc.transpose.3"(#loc2040))
+#loc4131 = loc("concatenate_2191"(#loc2041))
+#loc4132 = loc("multiply_2192"(#loc2042))
+#loc4133 = loc("add_2193"(#loc2043))
+#loc4134 = loc("reshape_2194.dc.squeeze.0"(#loc2044))
+#loc4135 = loc("transpose_2195"(#loc2045))
+#loc4136 = loc("matmul_2196"(#loc2046))
+#loc4137 = loc("reshape_2197.dc.unsqueeze.0"(#loc2047))
+#loc4138 = loc("multiply_2198"(#loc2048))
+#loc4139 = loc("add_2199"(#loc2049))
+#loc4140 = loc("softmax_2200"(#loc2050))
+#loc4141 = loc("reshape_2202.dc.squeeze.0"(#loc2051))
+#loc4142 = loc("matmul_2204"(#loc2052))
+#loc4143 = loc("reshape_2205"(#loc2053))
+#loc4144 = loc("transpose_2206"(#loc2054))
+#loc4145 = loc("transpose_2207"(#loc2055))
+#loc4146 = loc("reshape_2208.dc.squeeze.0"(#loc2056))
+#loc4147 = loc("transpose_2209"(#loc2057))
+#loc4148 = loc("matmul_2210"(#loc2058))
+#loc4149 = loc("reshape_2211.dc.unsqueeze.0"(#loc2059))
+#loc4150 = loc("transpose_2212"(#loc2060))
+#loc4151 = loc("reshape_2213"(#loc2061))
+#loc4152 = loc("matmul_2215"(#loc2062))
+#loc4153 = loc("reshape_2216.dc.unsqueeze.0"(#loc2063))
+#loc4154 = loc("add_2217"(#loc2064))
+#loc4155 = loc("multiply_2218"(#loc2065))
+#loc4156 = loc("reduce_avg_2219"(#loc2066))
+#loc4157 = loc("add_2220"(#loc2067))
+#loc4158 = loc("sqrt_2221"(#loc2068))
+#loc4159 = loc("reciprocal_2222"(#loc2069))
+#loc4160 = loc("multiply_2223"(#loc2070))
+#loc4161 = loc("multiply_2224"(#loc2071))
+#loc4162 = loc("reshape_2225.dc.squeeze.0"(#loc2072))
+#loc4163 = loc("matmul_2227"(#loc2073))
+#loc4164 = loc("reshape_2228.dc.unsqueeze.0"(#loc2074))
+#loc4165 = loc("sigmoid_2229"(#loc2075))
+#loc4166 = loc("multiply_2230"(#loc2076))
+#loc4167 = loc("matmul_2232"(#loc2077))
+#loc4168 = loc("reshape_2233.dc.unsqueeze.0"(#loc2078))
+#loc4169 = loc("multiply_2234"(#loc2079))
+#loc4170 = loc("matmul_2236"(#loc2080))
+#loc4171 = loc("add_2237"(#loc2081))
+#loc4172 = loc("multiply_2238"(#loc2082))
+#loc4173 = loc("reduce_avg_2239"(#loc2083))
+#loc4174 = loc("add_2240"(#loc2084))
+#loc4175 = loc("sqrt_2241"(#loc2085))
+#loc4176 = loc("reciprocal_2242"(#loc2086))
+#loc4177 = loc("multiply_2243"(#loc2087))
+#loc4178 = loc("multiply_2244"(#loc2088))
+#loc4179 = loc("matmul_2246"(#loc2089))
diff --git a/tools/explorer/test/models/resnet_ttir.mlir b/tools/explorer/test/models/resnet_ttir.mlir
new file mode 100644
index 000000000..e94c3d83d
--- /dev/null
+++ b/tools/explorer/test/models/resnet_ttir.mlir
@@ -0,0 +1,1788 @@
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>
+#loc = loc("ResNet":0:0)
+module @ResNet attributes {} {
+  func.func @forward(%arg0: tensor<1x3x224x224xf32> {ttir.name = "input_1"} loc("ResNet":0:0), %arg1: tensor<64x1x1xf32> {ttir.name = "input_1_add_1"} loc("ResNet":0:0), %arg2: tensor<64x1x1xf32> {ttir.name = "input_1_add_1_fork_clone1229"} loc("ResNet":0:0), %arg3: tensor<64x1x1xf32> {ttir.name = "input_1_add_18"} loc("ResNet":0:0), %arg4: tensor<64x1x1xf32> {ttir.name = "input_1_add_18_fork_clone1271"} loc("ResNet":0:0), %arg5: tensor<64x1x1xf32> {ttir.name = "input_1_add_34"} loc("ResNet":0:0), %arg6: tensor<64x1x1xf32> {ttir.name = "input_1_add_34_fork_clone1204"} loc("ResNet":0:0), %arg7: tensor<256x1x1xf32> {ttir.name = "input_1_add_50"} loc("ResNet":0:0), %arg8: tensor<256x1x1xf32> {ttir.name = "input_1_add_50_fork_clone1108"} loc("ResNet":0:0), %arg9: tensor<256x1x1xf32> {ttir.name = "input_1_add_65"} loc("ResNet":0:0), %arg10: tensor<256x1x1xf32> {ttir.name = "input_1_add_65_fork_clone1112"} loc("ResNet":0:0), %arg11: tensor<64x1x1xf32> {ttir.name = "input_1_add_82"} loc("ResNet":0:0), %arg12: tensor<64x1x1xf32> {ttir.name = "input_1_add_82_fork_clone1238"} loc("ResNet":0:0), %arg13: tensor<64x1x1xf32> {ttir.name = "input_1_add_98"} loc("ResNet":0:0), %arg14: tensor<64x1x1xf32> {ttir.name = "input_1_add_98_fork_clone1152"} loc("ResNet":0:0), %arg15: tensor<256x1x1xf32> {ttir.name = "input_1_add_114"} loc("ResNet":0:0), %arg16: tensor<256x1x1xf32> {ttir.name = "input_1_add_114_fork_clone1051"} loc("ResNet":0:0), %arg17: tensor<64x1x1xf32> {ttir.name = "input_1_add_131"} loc("ResNet":0:0), %arg18: tensor<64x1x1xf32> {ttir.name = "input_1_add_131_fork_clone1192"} loc("ResNet":0:0), %arg19: tensor<64x1x1xf32> {ttir.name = "input_1_add_147"} loc("ResNet":0:0), %arg20: tensor<64x1x1xf32> {ttir.name = "input_1_add_147_fork_clone1096"} loc("ResNet":0:0), %arg21: tensor<256x1x1xf32> {ttir.name = "input_1_add_163"} loc("ResNet":0:0), %arg22: tensor<256x1x1xf32> {ttir.name = "input_1_add_163_fork_clone992"} loc("ResNet":0:0), %arg23: tensor<128x1x1xf32> {ttir.name = "input_1_add_180"} loc("ResNet":0:0), %arg24: tensor<128x1x1xf32> {ttir.name = "input_1_add_180_fork_clone1065"} loc("ResNet":0:0), %arg25: tensor<128x1x1xf32> {ttir.name = "input_1_add_196"} loc("ResNet":0:0), %arg26: tensor<128x1x1xf32> {ttir.name = "input_1_add_196_fork_clone962"} loc("ResNet":0:0), %arg27: tensor<512x1x1xf32> {ttir.name = "input_1_add_212"} loc("ResNet":0:0), %arg28: tensor<512x1x1xf32> {ttir.name = "input_1_add_212_fork_clone853"} loc("ResNet":0:0), %arg29: tensor<512x1x1xf32> {ttir.name = "input_1_add_227"} loc("ResNet":0:0), %arg30: tensor<512x1x1xf32> {ttir.name = "input_1_add_227_fork_clone857"} loc("ResNet":0:0), %arg31: tensor<128x1x1xf32> {ttir.name = "input_1_add_244"} loc("ResNet":0:0), %arg32: tensor<128x1x1xf32> {ttir.name = "input_1_add_244_fork_clone1007"} loc("ResNet":0:0), %arg33: tensor<128x1x1xf32> {ttir.name = "input_1_add_260"} loc("ResNet":0:0), %arg34: tensor<128x1x1xf32> {ttir.name = "input_1_add_260_fork_clone901"} loc("ResNet":0:0), %arg35: tensor<512x1x1xf32> {ttir.name = "input_1_add_276"} loc("ResNet":0:0), %arg36: tensor<512x1x1xf32> {ttir.name = "input_1_add_276_fork_clone791"} loc("ResNet":0:0), %arg37: tensor<128x1x1xf32> {ttir.name = "input_1_add_293"} loc("ResNet":0:0), %arg38: tensor<128x1x1xf32> {ttir.name = "input_1_add_293_fork_clone950"} loc("ResNet":0:0), %arg39: tensor<128x1x1xf32> {ttir.name = "input_1_add_309"} loc("ResNet":0:0), %arg40: tensor<128x1x1xf32> {ttir.name = "input_1_add_309_fork_clone841"} loc("ResNet":0:0), %arg41: tensor<512x1x1xf32> {ttir.name = "input_1_add_325"} loc("ResNet":0:0), %arg42: tensor<512x1x1xf32> {ttir.name = "input_1_add_325_fork_clone735"} loc("ResNet":0:0), %arg43: tensor<128x1x1xf32> {ttir.name = "input_1_add_342"} loc("ResNet":0:0), %arg44: tensor<128x1x1xf32> {ttir.name = "input_1_add_342_fork_clone889"} loc("ResNet":0:0), %arg45: tensor<128x1x1xf32> {ttir.name = "input_1_add_358"} loc("ResNet":0:0), %arg46: tensor<128x1x1xf32> {ttir.name = "input_1_add_358_fork_clone779"} loc("ResNet":0:0), %arg47: tensor<512x1x1xf32> {ttir.name = "input_1_add_374"} loc("ResNet":0:0), %arg48: tensor<512x1x1xf32> {ttir.name = "input_1_add_374_fork_clone677"} loc("ResNet":0:0), %arg49: tensor<256x1x1xf32> {ttir.name = "input_1_add_391"} loc("ResNet":0:0), %arg50: tensor<256x1x1xf32> {ttir.name = "input_1_add_391_fork_clone748"} loc("ResNet":0:0), %arg51: tensor<256x1x1xf32> {ttir.name = "input_1_add_407"} loc("ResNet":0:0), %arg52: tensor<256x1x1xf32> {ttir.name = "input_1_add_407_fork_clone645"} loc("ResNet":0:0), %arg53: tensor<1024x1x1xf32> {ttir.name = "input_1_add_423"} loc("ResNet":0:0), %arg54: tensor<1024x1x1xf32> {ttir.name = "input_1_add_423_fork_clone524"} loc("ResNet":0:0), %arg55: tensor<1024x1x1xf32> {ttir.name = "input_1_add_438"} loc("ResNet":0:0), %arg56: tensor<1024x1x1xf32> {ttir.name = "input_1_add_438_fork_clone528"} loc("ResNet":0:0), %arg57: tensor<256x1x1xf32> {ttir.name = "input_1_add_455"} loc("ResNet":0:0), %arg58: tensor<256x1x1xf32> {ttir.name = "input_1_add_455_fork_clone692"} loc("ResNet":0:0), %arg59: tensor<256x1x1xf32> {ttir.name = "input_1_add_471"} loc("ResNet":0:0), %arg60: tensor<256x1x1xf32> {ttir.name = "input_1_add_471_fork_clone580"} loc("ResNet":0:0), %arg61: tensor<1024x1x1xf32> {ttir.name = "input_1_add_487"} loc("ResNet":0:0), %arg62: tensor<1024x1x1xf32> {ttir.name = "input_1_add_487_fork_clone453"} loc("ResNet":0:0), %arg63: tensor<256x1x1xf32> {ttir.name = "input_1_add_504"} loc("ResNet":0:0), %arg64: tensor<256x1x1xf32> {ttir.name = "input_1_add_504_fork_clone633"} loc("ResNet":0:0), %arg65: tensor<256x1x1xf32> {ttir.name = "input_1_add_520"} loc("ResNet":0:0), %arg66: tensor<256x1x1xf32> {ttir.name = "input_1_add_520_fork_clone512"} loc("ResNet":0:0), %arg67: tensor<1024x1x1xf32> {ttir.name = "input_1_add_536"} loc("ResNet":0:0), %arg68: tensor<1024x1x1xf32> {ttir.name = "input_1_add_536_fork_clone389"} loc("ResNet":0:0), %arg69: tensor<256x1x1xf32> {ttir.name = "input_1_add_553"} loc("ResNet":0:0), %arg70: tensor<256x1x1xf32> {ttir.name = "input_1_add_553_fork_clone568"} loc("ResNet":0:0), %arg71: tensor<256x1x1xf32> {ttir.name = "input_1_add_569"} loc("ResNet":0:0), %arg72: tensor<256x1x1xf32> {ttir.name = "input_1_add_569_fork_clone441"} loc("ResNet":0:0), %arg73: tensor<1024x1x1xf32> {ttir.name = "input_1_add_585"} loc("ResNet":0:0), %arg74: tensor<1024x1x1xf32> {ttir.name = "input_1_add_585_fork_clone329"} loc("ResNet":0:0), %arg75: tensor<256x1x1xf32> {ttir.name = "input_1_add_602"} loc("ResNet":0:0), %arg76: tensor<256x1x1xf32> {ttir.name = "input_1_add_602_fork_clone500"} loc("ResNet":0:0), %arg77: tensor<256x1x1xf32> {ttir.name = "input_1_add_618"} loc("ResNet":0:0), %arg78: tensor<256x1x1xf32> {ttir.name = "input_1_add_618_fork_clone377"} loc("ResNet":0:0), %arg79: tensor<1024x1x1xf32> {ttir.name = "input_1_add_634"} loc("ResNet":0:0), %arg80: tensor<1024x1x1xf32> {ttir.name = "input_1_add_634_fork_clone274"} loc("ResNet":0:0), %arg81: tensor<256x1x1xf32> {ttir.name = "input_1_add_651"} loc("ResNet":0:0), %arg82: tensor<256x1x1xf32> {ttir.name = "input_1_add_651_fork_clone429"} loc("ResNet":0:0), %arg83: tensor<256x1x1xf32> {ttir.name = "input_1_add_667"} loc("ResNet":0:0), %arg84: tensor<256x1x1xf32> {ttir.name = "input_1_add_667_fork_clone317"} loc("ResNet":0:0), %arg85: tensor<1024x1x1xf32> {ttir.name = "input_1_add_683"} loc("ResNet":0:0), %arg86: tensor<1024x1x1xf32> {ttir.name = "input_1_add_683_fork_clone219"} loc("ResNet":0:0), %arg87: tensor<512x1x1xf32> {ttir.name = "input_1_add_700"} loc("ResNet":0:0), %arg88: tensor<512x1x1xf32> {ttir.name = "input_1_add_700_fork_clone287"} loc("ResNet":0:0), %arg89: tensor<512x1x1xf32> {ttir.name = "input_1_add_716"} loc("ResNet":0:0), %arg90: tensor<512x1x1xf32> {ttir.name = "input_1_add_716_fork_clone190"} loc("ResNet":0:0), %arg91: tensor<2048x1x1xf32> {ttir.name = "input_1_add_732"} loc("ResNet":0:0), %arg92: tensor<2048x1x1xf32> {ttir.name = "input_1_add_732_fork_clone101"} loc("ResNet":0:0), %arg93: tensor<2048x1x1xf32> {ttir.name = "input_1_add_747"} loc("ResNet":0:0), %arg94: tensor<2048x1x1xf32> {ttir.name = "input_1_add_747_fork_clone105"} loc("ResNet":0:0), %arg95: tensor<512x1x1xf32> {ttir.name = "input_1_add_764"} loc("ResNet":0:0), %arg96: tensor<512x1x1xf32> {ttir.name = "input_1_add_764_fork_clone233"} loc("ResNet":0:0), %arg97: tensor<512x1x1xf32> {ttir.name = "input_1_add_780"} loc("ResNet":0:0), %arg98: tensor<512x1x1xf32> {ttir.name = "input_1_add_780_fork_clone138"} loc("ResNet":0:0), %arg99: tensor<2048x1x1xf32> {ttir.name = "input_1_add_796"} loc("ResNet":0:0), %arg100: tensor<2048x1x1xf32> {ttir.name = "input_1_add_796_fork_clone61"} loc("ResNet":0:0), %arg101: tensor<512x1x1xf32> {ttir.name = "input_1_add_813"} loc("ResNet":0:0), %arg102: tensor<512x1x1xf32> {ttir.name = "input_1_add_813_fork_clone178"} loc("ResNet":0:0), %arg103: tensor<512x1x1xf32> {ttir.name = "input_1_add_829"} loc("ResNet":0:0), %arg104: tensor<512x1x1xf32> {ttir.name = "input_1_add_829_fork_clone89"} loc("ResNet":0:0), %arg105: tensor<2048x1x1xf32> {ttir.name = "input_1_add_845"} loc("ResNet":0:0), %arg106: tensor<2048x1x1xf32> {ttir.name = "input_1_add_845_fork_clone32"} loc("ResNet":0:0), %arg107: tensor<64x3x7x7xf32> {ttir.name = "conv1.weight"} loc("ResNet":0:0), %arg108: tensor<64x64x1x1xf32> {ttir.name = "layer1.0.conv1.weight"} loc("ResNet":0:0), %arg109: tensor<64x64x3x3xf32> {ttir.name = "layer1.0.conv2.weight"} loc("ResNet":0:0), %arg110: tensor<256x64x1x1xf32> {ttir.name = "layer1.0.conv3.weight"} loc("ResNet":0:0), %arg111: tensor<256x64x1x1xf32> {ttir.name = "layer1.0.downsample.0.weight"} loc("ResNet":0:0), %arg112: tensor<64x256x1x1xf32> {ttir.name = "layer1.1.conv1.weight"} loc("ResNet":0:0), %arg113: tensor<64x64x3x3xf32> {ttir.name = "layer1.1.conv2.weight"} loc("ResNet":0:0), %arg114: tensor<256x64x1x1xf32> {ttir.name = "layer1.1.conv3.weight"} loc("ResNet":0:0), %arg115: tensor<64x256x1x1xf32> {ttir.name = "layer1.2.conv1.weight"} loc("ResNet":0:0), %arg116: tensor<64x64x3x3xf32> {ttir.name = "layer1.2.conv2.weight"} loc("ResNet":0:0), %arg117: tensor<256x64x1x1xf32> {ttir.name = "layer1.2.conv3.weight"} loc("ResNet":0:0), %arg118: tensor<128x256x1x1xf32> {ttir.name = "layer2.0.conv1.weight"} loc("ResNet":0:0), %arg119: tensor<128x128x3x3xf32> {ttir.name = "layer2.0.conv2.weight"} loc("ResNet":0:0), %arg120: tensor<512x128x1x1xf32> {ttir.name = "layer2.0.conv3.weight"} loc("ResNet":0:0), %arg121: tensor<512x256x1x1xf32> {ttir.name = "layer2.0.downsample.0.weight"} loc("ResNet":0:0), %arg122: tensor<128x512x1x1xf32> {ttir.name = "layer2.1.conv1.weight"} loc("ResNet":0:0), %arg123: tensor<128x128x3x3xf32> {ttir.name = "layer2.1.conv2.weight"} loc("ResNet":0:0), %arg124: tensor<512x128x1x1xf32> {ttir.name = "layer2.1.conv3.weight"} loc("ResNet":0:0), %arg125: tensor<128x512x1x1xf32> {ttir.name = "layer2.2.conv1.weight"} loc("ResNet":0:0), %arg126: tensor<128x128x3x3xf32> {ttir.name = "layer2.2.conv2.weight"} loc("ResNet":0:0), %arg127: tensor<512x128x1x1xf32> {ttir.name = "layer2.2.conv3.weight"} loc("ResNet":0:0), %arg128: tensor<128x512x1x1xf32> {ttir.name = "layer2.3.conv1.weight"} loc("ResNet":0:0), %arg129: tensor<128x128x3x3xf32> {ttir.name = "layer2.3.conv2.weight"} loc("ResNet":0:0), %arg130: tensor<512x128x1x1xf32> {ttir.name = "layer2.3.conv3.weight"} loc("ResNet":0:0), %arg131: tensor<256x512x1x1xf32> {ttir.name = "layer3.0.conv1.weight"} loc("ResNet":0:0), %arg132: tensor<256x256x3x3xf32> {ttir.name = "layer3.0.conv2.weight"} loc("ResNet":0:0), %arg133: tensor<1024x256x1x1xf32> {ttir.name = "layer3.0.conv3.weight"} loc("ResNet":0:0), %arg134: tensor<1024x512x1x1xf32> {ttir.name = "layer3.0.downsample.0.weight"} loc("ResNet":0:0), %arg135: tensor<256x1024x1x1xf32> {ttir.name = "layer3.1.conv1.weight"} loc("ResNet":0:0), %arg136: tensor<256x256x3x3xf32> {ttir.name = "layer3.1.conv2.weight"} loc("ResNet":0:0), %arg137: tensor<1024x256x1x1xf32> {ttir.name = "layer3.1.conv3.weight"} loc("ResNet":0:0), %arg138: tensor<256x1024x1x1xf32> {ttir.name = "layer3.2.conv1.weight"} loc("ResNet":0:0), %arg139: tensor<256x256x3x3xf32> {ttir.name = "layer3.2.conv2.weight"} loc("ResNet":0:0), %arg140: tensor<1024x256x1x1xf32> {ttir.name = "layer3.2.conv3.weight"} loc("ResNet":0:0), %arg141: tensor<256x1024x1x1xf32> {ttir.name = "layer3.3.conv1.weight"} loc("ResNet":0:0), %arg142: tensor<256x256x3x3xf32> {ttir.name = "layer3.3.conv2.weight"} loc("ResNet":0:0), %arg143: tensor<1024x256x1x1xf32> {ttir.name = "layer3.3.conv3.weight"} loc("ResNet":0:0), %arg144: tensor<256x1024x1x1xf32> {ttir.name = "layer3.4.conv1.weight"} loc("ResNet":0:0), %arg145: tensor<256x256x3x3xf32> {ttir.name = "layer3.4.conv2.weight"} loc("ResNet":0:0), %arg146: tensor<1024x256x1x1xf32> {ttir.name = "layer3.4.conv3.weight"} loc("ResNet":0:0), %arg147: tensor<256x1024x1x1xf32> {ttir.name = "layer3.5.conv1.weight"} loc("ResNet":0:0), %arg148: tensor<256x256x3x3xf32> {ttir.name = "layer3.5.conv2.weight"} loc("ResNet":0:0), %arg149: tensor<1024x256x1x1xf32> {ttir.name = "layer3.5.conv3.weight"} loc("ResNet":0:0), %arg150: tensor<512x1024x1x1xf32> {ttir.name = "layer4.0.conv1.weight"} loc("ResNet":0:0), %arg151: tensor<512x512x3x3xf32> {ttir.name = "layer4.0.conv2.weight"} loc("ResNet":0:0), %arg152: tensor<2048x512x1x1xf32> {ttir.name = "layer4.0.conv3.weight"} loc("ResNet":0:0), %arg153: tensor<2048x1024x1x1xf32> {ttir.name = "layer4.0.downsample.0.weight"} loc("ResNet":0:0), %arg154: tensor<512x2048x1x1xf32> {ttir.name = "layer4.1.conv1.weight"} loc("ResNet":0:0), %arg155: tensor<512x512x3x3xf32> {ttir.name = "layer4.1.conv2.weight"} loc("ResNet":0:0), %arg156: tensor<2048x512x1x1xf32> {ttir.name = "layer4.1.conv3.weight"} loc("ResNet":0:0), %arg157: tensor<512x2048x1x1xf32> {ttir.name = "layer4.2.conv1.weight"} loc("ResNet":0:0), %arg158: tensor<512x512x3x3xf32> {ttir.name = "layer4.2.conv2.weight"} loc("ResNet":0:0), %arg159: tensor<2048x512x1x1xf32> {ttir.name = "layer4.2.conv3.weight"} loc("ResNet":0:0), %arg160: tensor<2048x1000xf32> {ttir.name = "fc.weight"} loc("ResNet":0:0), %arg161: tensor<1000xf32> {ttir.name = "fc.bias"} loc("ResNet":0:0)) -> (tensor<1x1000xf32> {ttir.name = "ResNet.output_add_867"}) {
+    %0 = tensor.empty() : tensor<1x224x3x224xf32> loc(#loc447)
+    %1 = "ttir.transpose"(%arg0, %0) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x3x224x224xf32>, tensor<1x224x3x224xf32>) -> tensor<1x224x3x224xf32> loc(#loc447)
+    %2 = tensor.empty() : tensor<1x224x224x3xf32> loc(#loc448)
+    %3 = "ttir.transpose"(%1, %2) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x224x3x224xf32>, tensor<1x224x224x3xf32>) -> tensor<1x224x224x3xf32> loc(#loc448)
+    %4 = tensor.empty() : tensor<1x112x112x64xf32> loc(#loc449)
+    %5 = "ttir.conv2d"(%3, %arg107, %4) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 3 : si32, padding_left = 3 : si32, padding_right = 3 : si32, padding_top = 3 : si32, stride_height = 2 : si32, stride_width = 2 : si32}> {channel_last = 1 : si32} : (tensor<1x224x224x3xf32>, tensor<64x3x7x7xf32>, tensor<1x112x112x64xf32>) -> tensor<1x112x112x64xf32> loc(#loc449)
+    %6 = tensor.empty() : tensor<1x112x64x112xf32> loc(#loc450)
+    %7 = "ttir.transpose"(%5, %6) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x112x112x64xf32>, tensor<1x112x64x112xf32>) -> tensor<1x112x64x112xf32> loc(#loc450)
+    %8 = tensor.empty() : tensor<1x64x112x112xf32> loc(#loc451)
+    %9 = "ttir.transpose"(%7, %8) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x112x64x112xf32>, tensor<1x64x112x112xf32>) -> tensor<1x64x112x112xf32> loc(#loc451)
+    %10 = tensor.empty() : tensor<1x64x112x112xf32> loc(#loc452)
+    %11 = "ttir.multiply"(%9, %arg1, %10) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x64x112x112xf32>, tensor<64x1x1xf32>, tensor<1x64x112x112xf32>) -> tensor<1x64x112x112xf32> loc(#loc452)
+    %12 = tensor.empty() : tensor<1x64x112x112xf32> loc(#loc453)
+    %13 = "ttir.add"(%11, %arg2, %12) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x64x112x112xf32>, tensor<64x1x1xf32>, tensor<1x64x112x112xf32>) -> tensor<1x64x112x112xf32> loc(#loc453)
+    %14 = tensor.empty() : tensor<1x64x112x112xf32> loc(#loc454)
+    %15 = "ttir.relu"(%13, %14) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x64x112x112xf32>, tensor<1x64x112x112xf32>) -> tensor<1x64x112x112xf32> loc(#loc454)
+    %16 = tensor.empty() : tensor<1x64x56x56xf32> loc(#loc455)
+    %17 = "ttir.max_pool2d"(%15, %16) <{ceil_mode = false, dilation_height = 1 : si32, dilation_width = 1 : si32, kernel_height = 3 : si32, kernel_width = 3 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 1 : si32, padding_left = 1 : si32, padding_right = 1 : si32, padding_top = 1 : si32, stride_height = 2 : si32, stride_width = 2 : si32}> {channel_last = 0 : si32} : (tensor<1x64x112x112xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc455)
+    %18 = tensor.empty() : tensor<1x56x64x56xf32> loc(#loc456)
+    %19 = "ttir.transpose"(%17, %18) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<1x56x64x56xf32>) -> tensor<1x56x64x56xf32> loc(#loc456)
+    %20 = tensor.empty() : tensor<1x56x56x64xf32> loc(#loc457)
+    %21 = "ttir.transpose"(%19, %20) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x64x56xf32>, tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc457)
+    %22 = tensor.empty() : tensor<1x56x56x64xf32> loc(#loc458)
+    %23 = "ttir.conv2d"(%21, %arg108, %22) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x56x56x64xf32>, tensor<64x64x1x1xf32>, tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc458)
+    %24 = tensor.empty() : tensor<1x56x64x56xf32> loc(#loc459)
+    %25 = "ttir.transpose"(%23, %24) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x56x64xf32>, tensor<1x56x64x56xf32>) -> tensor<1x56x64x56xf32> loc(#loc459)
+    %26 = tensor.empty() : tensor<1x64x56x56xf32> loc(#loc460)
+    %27 = "ttir.transpose"(%25, %26) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x64x56xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc460)
+    %28 = tensor.empty() : tensor<1x64x56x56xf32> loc(#loc461)
+    %29 = "ttir.multiply"(%27, %arg3, %28) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<64x1x1xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc461)
+    %30 = tensor.empty() : tensor<1x64x56x56xf32> loc(#loc462)
+    %31 = "ttir.add"(%29, %arg4, %30) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<64x1x1xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc462)
+    %32 = tensor.empty() : tensor<1x64x56x56xf32> loc(#loc463)
+    %33 = "ttir.relu"(%31, %32) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc463)
+    %34 = tensor.empty() : tensor<1x56x64x56xf32> loc(#loc464)
+    %35 = "ttir.transpose"(%33, %34) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<1x56x64x56xf32>) -> tensor<1x56x64x56xf32> loc(#loc464)
+    %36 = tensor.empty() : tensor<1x56x56x64xf32> loc(#loc465)
+    %37 = "ttir.transpose"(%35, %36) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x64x56xf32>, tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc465)
+    %38 = tensor.empty() : tensor<1x56x56x64xf32> loc(#loc466)
+    %39 = "ttir.conv2d"(%37, %arg109, %38) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 1 : si32, padding_left = 1 : si32, padding_right = 1 : si32, padding_top = 1 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x56x56x64xf32>, tensor<64x64x3x3xf32>, tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc466)
+    %40 = tensor.empty() : tensor<1x56x64x56xf32> loc(#loc467)
+    %41 = "ttir.transpose"(%39, %40) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x56x64xf32>, tensor<1x56x64x56xf32>) -> tensor<1x56x64x56xf32> loc(#loc467)
+    %42 = tensor.empty() : tensor<1x64x56x56xf32> loc(#loc468)
+    %43 = "ttir.transpose"(%41, %42) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x64x56xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc468)
+    %44 = tensor.empty() : tensor<1x64x56x56xf32> loc(#loc469)
+    %45 = "ttir.multiply"(%43, %arg5, %44) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<64x1x1xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc469)
+    %46 = tensor.empty() : tensor<1x64x56x56xf32> loc(#loc470)
+    %47 = "ttir.add"(%45, %arg6, %46) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<64x1x1xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc470)
+    %48 = tensor.empty() : tensor<1x64x56x56xf32> loc(#loc471)
+    %49 = "ttir.relu"(%47, %48) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc471)
+    %50 = tensor.empty() : tensor<1x56x64x56xf32> loc(#loc472)
+    %51 = "ttir.transpose"(%49, %50) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<1x56x64x56xf32>) -> tensor<1x56x64x56xf32> loc(#loc472)
+    %52 = tensor.empty() : tensor<1x56x56x64xf32> loc(#loc473)
+    %53 = "ttir.transpose"(%51, %52) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x64x56xf32>, tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc473)
+    %54 = tensor.empty() : tensor<1x56x56x256xf32> loc(#loc474)
+    %55 = "ttir.conv2d"(%53, %arg110, %54) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x56x56x64xf32>, tensor<256x64x1x1xf32>, tensor<1x56x56x256xf32>) -> tensor<1x56x56x256xf32> loc(#loc474)
+    %56 = tensor.empty() : tensor<1x56x256x56xf32> loc(#loc475)
+    %57 = "ttir.transpose"(%55, %56) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x56x256xf32>, tensor<1x56x256x56xf32>) -> tensor<1x56x256x56xf32> loc(#loc475)
+    %58 = tensor.empty() : tensor<1x256x56x56xf32> loc(#loc476)
+    %59 = "ttir.transpose"(%57, %58) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x256x56xf32>, tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> loc(#loc476)
+    %60 = tensor.empty() : tensor<1x256x56x56xf32> loc(#loc477)
+    %61 = "ttir.multiply"(%59, %arg7, %60) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x56x56xf32>, tensor<256x1x1xf32>, tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> loc(#loc477)
+    %62 = tensor.empty() : tensor<1x256x56x56xf32> loc(#loc478)
+    %63 = "ttir.add"(%61, %arg8, %62) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x56x56xf32>, tensor<256x1x1xf32>, tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> loc(#loc478)
+    %64 = tensor.empty() : tensor<1x56x64x56xf32> loc(#loc479)
+    %65 = "ttir.transpose"(%17, %64) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<1x56x64x56xf32>) -> tensor<1x56x64x56xf32> loc(#loc479)
+    %66 = tensor.empty() : tensor<1x56x56x64xf32> loc(#loc480)
+    %67 = "ttir.transpose"(%65, %66) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x64x56xf32>, tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc480)
+    %68 = tensor.empty() : tensor<1x56x56x256xf32> loc(#loc481)
+    %69 = "ttir.conv2d"(%67, %arg111, %68) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x56x56x64xf32>, tensor<256x64x1x1xf32>, tensor<1x56x56x256xf32>) -> tensor<1x56x56x256xf32> loc(#loc481)
+    %70 = tensor.empty() : tensor<1x56x256x56xf32> loc(#loc482)
+    %71 = "ttir.transpose"(%69, %70) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x56x256xf32>, tensor<1x56x256x56xf32>) -> tensor<1x56x256x56xf32> loc(#loc482)
+    %72 = tensor.empty() : tensor<1x256x56x56xf32> loc(#loc483)
+    %73 = "ttir.transpose"(%71, %72) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x256x56xf32>, tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> loc(#loc483)
+    %74 = tensor.empty() : tensor<1x256x56x56xf32> loc(#loc484)
+    %75 = "ttir.multiply"(%73, %arg9, %74) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x56x56xf32>, tensor<256x1x1xf32>, tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> loc(#loc484)
+    %76 = tensor.empty() : tensor<1x256x56x56xf32> loc(#loc485)
+    %77 = "ttir.add"(%75, %arg10, %76) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x56x56xf32>, tensor<256x1x1xf32>, tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> loc(#loc485)
+    %78 = tensor.empty() : tensor<1x256x56x56xf32> loc(#loc486)
+    %79 = "ttir.add"(%63, %77, %78) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x56x56xf32>, tensor<1x256x56x56xf32>, tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> loc(#loc486)
+    %80 = tensor.empty() : tensor<1x256x56x56xf32> loc(#loc487)
+    %81 = "ttir.relu"(%79, %80) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x56x56xf32>, tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> loc(#loc487)
+    %82 = tensor.empty() : tensor<1x56x256x56xf32> loc(#loc488)
+    %83 = "ttir.transpose"(%81, %82) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x56x56xf32>, tensor<1x56x256x56xf32>) -> tensor<1x56x256x56xf32> loc(#loc488)
+    %84 = tensor.empty() : tensor<1x56x56x256xf32> loc(#loc489)
+    %85 = "ttir.transpose"(%83, %84) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x256x56xf32>, tensor<1x56x56x256xf32>) -> tensor<1x56x56x256xf32> loc(#loc489)
+    %86 = tensor.empty() : tensor<1x56x56x64xf32> loc(#loc490)
+    %87 = "ttir.conv2d"(%85, %arg112, %86) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x56x56x256xf32>, tensor<64x256x1x1xf32>, tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc490)
+    %88 = tensor.empty() : tensor<1x56x64x56xf32> loc(#loc491)
+    %89 = "ttir.transpose"(%87, %88) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x56x64xf32>, tensor<1x56x64x56xf32>) -> tensor<1x56x64x56xf32> loc(#loc491)
+    %90 = tensor.empty() : tensor<1x64x56x56xf32> loc(#loc492)
+    %91 = "ttir.transpose"(%89, %90) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x64x56xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc492)
+    %92 = tensor.empty() : tensor<1x64x56x56xf32> loc(#loc493)
+    %93 = "ttir.multiply"(%91, %arg11, %92) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<64x1x1xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc493)
+    %94 = tensor.empty() : tensor<1x64x56x56xf32> loc(#loc494)
+    %95 = "ttir.add"(%93, %arg12, %94) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<64x1x1xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc494)
+    %96 = tensor.empty() : tensor<1x64x56x56xf32> loc(#loc495)
+    %97 = "ttir.relu"(%95, %96) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc495)
+    %98 = tensor.empty() : tensor<1x56x64x56xf32> loc(#loc496)
+    %99 = "ttir.transpose"(%97, %98) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<1x56x64x56xf32>) -> tensor<1x56x64x56xf32> loc(#loc496)
+    %100 = tensor.empty() : tensor<1x56x56x64xf32> loc(#loc497)
+    %101 = "ttir.transpose"(%99, %100) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x64x56xf32>, tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc497)
+    %102 = tensor.empty() : tensor<1x56x56x64xf32> loc(#loc498)
+    %103 = "ttir.conv2d"(%101, %arg113, %102) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 1 : si32, padding_left = 1 : si32, padding_right = 1 : si32, padding_top = 1 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x56x56x64xf32>, tensor<64x64x3x3xf32>, tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc498)
+    %104 = tensor.empty() : tensor<1x56x64x56xf32> loc(#loc499)
+    %105 = "ttir.transpose"(%103, %104) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x56x64xf32>, tensor<1x56x64x56xf32>) -> tensor<1x56x64x56xf32> loc(#loc499)
+    %106 = tensor.empty() : tensor<1x64x56x56xf32> loc(#loc500)
+    %107 = "ttir.transpose"(%105, %106) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x64x56xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc500)
+    %108 = tensor.empty() : tensor<1x64x56x56xf32> loc(#loc501)
+    %109 = "ttir.multiply"(%107, %arg13, %108) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<64x1x1xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc501)
+    %110 = tensor.empty() : tensor<1x64x56x56xf32> loc(#loc502)
+    %111 = "ttir.add"(%109, %arg14, %110) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<64x1x1xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc502)
+    %112 = tensor.empty() : tensor<1x64x56x56xf32> loc(#loc503)
+    %113 = "ttir.relu"(%111, %112) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc503)
+    %114 = tensor.empty() : tensor<1x56x64x56xf32> loc(#loc504)
+    %115 = "ttir.transpose"(%113, %114) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<1x56x64x56xf32>) -> tensor<1x56x64x56xf32> loc(#loc504)
+    %116 = tensor.empty() : tensor<1x56x56x64xf32> loc(#loc505)
+    %117 = "ttir.transpose"(%115, %116) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x64x56xf32>, tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc505)
+    %118 = tensor.empty() : tensor<1x56x56x256xf32> loc(#loc506)
+    %119 = "ttir.conv2d"(%117, %arg114, %118) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x56x56x64xf32>, tensor<256x64x1x1xf32>, tensor<1x56x56x256xf32>) -> tensor<1x56x56x256xf32> loc(#loc506)
+    %120 = tensor.empty() : tensor<1x56x256x56xf32> loc(#loc507)
+    %121 = "ttir.transpose"(%119, %120) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x56x256xf32>, tensor<1x56x256x56xf32>) -> tensor<1x56x256x56xf32> loc(#loc507)
+    %122 = tensor.empty() : tensor<1x256x56x56xf32> loc(#loc508)
+    %123 = "ttir.transpose"(%121, %122) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x256x56xf32>, tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> loc(#loc508)
+    %124 = tensor.empty() : tensor<1x256x56x56xf32> loc(#loc509)
+    %125 = "ttir.multiply"(%123, %arg15, %124) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x56x56xf32>, tensor<256x1x1xf32>, tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> loc(#loc509)
+    %126 = tensor.empty() : tensor<1x256x56x56xf32> loc(#loc510)
+    %127 = "ttir.add"(%125, %arg16, %126) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x56x56xf32>, tensor<256x1x1xf32>, tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> loc(#loc510)
+    %128 = tensor.empty() : tensor<1x256x56x56xf32> loc(#loc511)
+    %129 = "ttir.add"(%127, %81, %128) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x56x56xf32>, tensor<1x256x56x56xf32>, tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> loc(#loc511)
+    %130 = tensor.empty() : tensor<1x256x56x56xf32> loc(#loc512)
+    %131 = "ttir.relu"(%129, %130) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x56x56xf32>, tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> loc(#loc512)
+    %132 = tensor.empty() : tensor<1x56x256x56xf32> loc(#loc513)
+    %133 = "ttir.transpose"(%131, %132) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x56x56xf32>, tensor<1x56x256x56xf32>) -> tensor<1x56x256x56xf32> loc(#loc513)
+    %134 = tensor.empty() : tensor<1x56x56x256xf32> loc(#loc514)
+    %135 = "ttir.transpose"(%133, %134) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x256x56xf32>, tensor<1x56x56x256xf32>) -> tensor<1x56x56x256xf32> loc(#loc514)
+    %136 = tensor.empty() : tensor<1x56x56x64xf32> loc(#loc515)
+    %137 = "ttir.conv2d"(%135, %arg115, %136) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x56x56x256xf32>, tensor<64x256x1x1xf32>, tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc515)
+    %138 = tensor.empty() : tensor<1x56x64x56xf32> loc(#loc516)
+    %139 = "ttir.transpose"(%137, %138) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x56x64xf32>, tensor<1x56x64x56xf32>) -> tensor<1x56x64x56xf32> loc(#loc516)
+    %140 = tensor.empty() : tensor<1x64x56x56xf32> loc(#loc517)
+    %141 = "ttir.transpose"(%139, %140) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x64x56xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc517)
+    %142 = tensor.empty() : tensor<1x64x56x56xf32> loc(#loc518)
+    %143 = "ttir.multiply"(%141, %arg17, %142) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<64x1x1xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc518)
+    %144 = tensor.empty() : tensor<1x64x56x56xf32> loc(#loc519)
+    %145 = "ttir.add"(%143, %arg18, %144) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<64x1x1xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc519)
+    %146 = tensor.empty() : tensor<1x64x56x56xf32> loc(#loc520)
+    %147 = "ttir.relu"(%145, %146) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc520)
+    %148 = tensor.empty() : tensor<1x56x64x56xf32> loc(#loc521)
+    %149 = "ttir.transpose"(%147, %148) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<1x56x64x56xf32>) -> tensor<1x56x64x56xf32> loc(#loc521)
+    %150 = tensor.empty() : tensor<1x56x56x64xf32> loc(#loc522)
+    %151 = "ttir.transpose"(%149, %150) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x64x56xf32>, tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc522)
+    %152 = tensor.empty() : tensor<1x56x56x64xf32> loc(#loc523)
+    %153 = "ttir.conv2d"(%151, %arg116, %152) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 1 : si32, padding_left = 1 : si32, padding_right = 1 : si32, padding_top = 1 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x56x56x64xf32>, tensor<64x64x3x3xf32>, tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc523)
+    %154 = tensor.empty() : tensor<1x56x64x56xf32> loc(#loc524)
+    %155 = "ttir.transpose"(%153, %154) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x56x64xf32>, tensor<1x56x64x56xf32>) -> tensor<1x56x64x56xf32> loc(#loc524)
+    %156 = tensor.empty() : tensor<1x64x56x56xf32> loc(#loc525)
+    %157 = "ttir.transpose"(%155, %156) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x64x56xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc525)
+    %158 = tensor.empty() : tensor<1x64x56x56xf32> loc(#loc526)
+    %159 = "ttir.multiply"(%157, %arg19, %158) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<64x1x1xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc526)
+    %160 = tensor.empty() : tensor<1x64x56x56xf32> loc(#loc527)
+    %161 = "ttir.add"(%159, %arg20, %160) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<64x1x1xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc527)
+    %162 = tensor.empty() : tensor<1x64x56x56xf32> loc(#loc528)
+    %163 = "ttir.relu"(%161, %162) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> loc(#loc528)
+    %164 = tensor.empty() : tensor<1x56x64x56xf32> loc(#loc529)
+    %165 = "ttir.transpose"(%163, %164) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x64x56x56xf32>, tensor<1x56x64x56xf32>) -> tensor<1x56x64x56xf32> loc(#loc529)
+    %166 = tensor.empty() : tensor<1x56x56x64xf32> loc(#loc530)
+    %167 = "ttir.transpose"(%165, %166) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x64x56xf32>, tensor<1x56x56x64xf32>) -> tensor<1x56x56x64xf32> loc(#loc530)
+    %168 = tensor.empty() : tensor<1x56x56x256xf32> loc(#loc531)
+    %169 = "ttir.conv2d"(%167, %arg117, %168) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x56x56x64xf32>, tensor<256x64x1x1xf32>, tensor<1x56x56x256xf32>) -> tensor<1x56x56x256xf32> loc(#loc531)
+    %170 = tensor.empty() : tensor<1x56x256x56xf32> loc(#loc532)
+    %171 = "ttir.transpose"(%169, %170) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x56x256xf32>, tensor<1x56x256x56xf32>) -> tensor<1x56x256x56xf32> loc(#loc532)
+    %172 = tensor.empty() : tensor<1x256x56x56xf32> loc(#loc533)
+    %173 = "ttir.transpose"(%171, %172) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x256x56xf32>, tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> loc(#loc533)
+    %174 = tensor.empty() : tensor<1x256x56x56xf32> loc(#loc534)
+    %175 = "ttir.multiply"(%173, %arg21, %174) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x56x56xf32>, tensor<256x1x1xf32>, tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> loc(#loc534)
+    %176 = tensor.empty() : tensor<1x256x56x56xf32> loc(#loc535)
+    %177 = "ttir.add"(%175, %arg22, %176) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x56x56xf32>, tensor<256x1x1xf32>, tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> loc(#loc535)
+    %178 = tensor.empty() : tensor<1x256x56x56xf32> loc(#loc536)
+    %179 = "ttir.add"(%177, %131, %178) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x56x56xf32>, tensor<1x256x56x56xf32>, tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> loc(#loc536)
+    %180 = tensor.empty() : tensor<1x256x56x56xf32> loc(#loc537)
+    %181 = "ttir.relu"(%179, %180) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x56x56xf32>, tensor<1x256x56x56xf32>) -> tensor<1x256x56x56xf32> loc(#loc537)
+    %182 = tensor.empty() : tensor<1x56x256x56xf32> loc(#loc538)
+    %183 = "ttir.transpose"(%181, %182) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x56x56xf32>, tensor<1x56x256x56xf32>) -> tensor<1x56x256x56xf32> loc(#loc538)
+    %184 = tensor.empty() : tensor<1x56x56x256xf32> loc(#loc539)
+    %185 = "ttir.transpose"(%183, %184) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x256x56xf32>, tensor<1x56x56x256xf32>) -> tensor<1x56x56x256xf32> loc(#loc539)
+    %186 = tensor.empty() : tensor<1x56x56x128xf32> loc(#loc540)
+    %187 = "ttir.conv2d"(%185, %arg118, %186) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x56x56x256xf32>, tensor<128x256x1x1xf32>, tensor<1x56x56x128xf32>) -> tensor<1x56x56x128xf32> loc(#loc540)
+    %188 = tensor.empty() : tensor<1x56x128x56xf32> loc(#loc541)
+    %189 = "ttir.transpose"(%187, %188) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x56x128xf32>, tensor<1x56x128x56xf32>) -> tensor<1x56x128x56xf32> loc(#loc541)
+    %190 = tensor.empty() : tensor<1x128x56x56xf32> loc(#loc542)
+    %191 = "ttir.transpose"(%189, %190) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x128x56xf32>, tensor<1x128x56x56xf32>) -> tensor<1x128x56x56xf32> loc(#loc542)
+    %192 = tensor.empty() : tensor<1x128x56x56xf32> loc(#loc543)
+    %193 = "ttir.multiply"(%191, %arg23, %192) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x128x56x56xf32>, tensor<128x1x1xf32>, tensor<1x128x56x56xf32>) -> tensor<1x128x56x56xf32> loc(#loc543)
+    %194 = tensor.empty() : tensor<1x128x56x56xf32> loc(#loc544)
+    %195 = "ttir.add"(%193, %arg24, %194) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x128x56x56xf32>, tensor<128x1x1xf32>, tensor<1x128x56x56xf32>) -> tensor<1x128x56x56xf32> loc(#loc544)
+    %196 = tensor.empty() : tensor<1x128x56x56xf32> loc(#loc545)
+    %197 = "ttir.relu"(%195, %196) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x128x56x56xf32>, tensor<1x128x56x56xf32>) -> tensor<1x128x56x56xf32> loc(#loc545)
+    %198 = tensor.empty() : tensor<1x56x128x56xf32> loc(#loc546)
+    %199 = "ttir.transpose"(%197, %198) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x128x56x56xf32>, tensor<1x56x128x56xf32>) -> tensor<1x56x128x56xf32> loc(#loc546)
+    %200 = tensor.empty() : tensor<1x56x56x128xf32> loc(#loc547)
+    %201 = "ttir.transpose"(%199, %200) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x128x56xf32>, tensor<1x56x56x128xf32>) -> tensor<1x56x56x128xf32> loc(#loc547)
+    %202 = tensor.empty() : tensor<1x28x28x128xf32> loc(#loc548)
+    %203 = "ttir.conv2d"(%201, %arg119, %202) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 1 : si32, padding_left = 1 : si32, padding_right = 1 : si32, padding_top = 1 : si32, stride_height = 2 : si32, stride_width = 2 : si32}> {channel_last = 1 : si32} : (tensor<1x56x56x128xf32>, tensor<128x128x3x3xf32>, tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc548)
+    %204 = tensor.empty() : tensor<1x28x128x28xf32> loc(#loc549)
+    %205 = "ttir.transpose"(%203, %204) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x28x128xf32>, tensor<1x28x128x28xf32>) -> tensor<1x28x128x28xf32> loc(#loc549)
+    %206 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc550)
+    %207 = "ttir.transpose"(%205, %206) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x128x28xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc550)
+    %208 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc551)
+    %209 = "ttir.multiply"(%207, %arg25, %208) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<128x1x1xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc551)
+    %210 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc552)
+    %211 = "ttir.add"(%209, %arg26, %210) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<128x1x1xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc552)
+    %212 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc553)
+    %213 = "ttir.relu"(%211, %212) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc553)
+    %214 = tensor.empty() : tensor<1x28x128x28xf32> loc(#loc554)
+    %215 = "ttir.transpose"(%213, %214) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<1x28x128x28xf32>) -> tensor<1x28x128x28xf32> loc(#loc554)
+    %216 = tensor.empty() : tensor<1x28x28x128xf32> loc(#loc555)
+    %217 = "ttir.transpose"(%215, %216) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x128x28xf32>, tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc555)
+    %218 = tensor.empty() : tensor<1x28x28x512xf32> loc(#loc556)
+    %219 = "ttir.conv2d"(%217, %arg120, %218) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x28x28x128xf32>, tensor<512x128x1x1xf32>, tensor<1x28x28x512xf32>) -> tensor<1x28x28x512xf32> loc(#loc556)
+    %220 = tensor.empty() : tensor<1x28x512x28xf32> loc(#loc557)
+    %221 = "ttir.transpose"(%219, %220) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x28x512xf32>, tensor<1x28x512x28xf32>) -> tensor<1x28x512x28xf32> loc(#loc557)
+    %222 = tensor.empty() : tensor<1x512x28x28xf32> loc(#loc558)
+    %223 = "ttir.transpose"(%221, %222) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x512x28xf32>, tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> loc(#loc558)
+    %224 = tensor.empty() : tensor<1x512x28x28xf32> loc(#loc559)
+    %225 = "ttir.multiply"(%223, %arg27, %224) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x28x28xf32>, tensor<512x1x1xf32>, tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> loc(#loc559)
+    %226 = tensor.empty() : tensor<1x512x28x28xf32> loc(#loc560)
+    %227 = "ttir.add"(%225, %arg28, %226) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x28x28xf32>, tensor<512x1x1xf32>, tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> loc(#loc560)
+    %228 = tensor.empty() : tensor<1x56x256x56xf32> loc(#loc561)
+    %229 = "ttir.transpose"(%181, %228) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x56x56xf32>, tensor<1x56x256x56xf32>) -> tensor<1x56x256x56xf32> loc(#loc561)
+    %230 = tensor.empty() : tensor<1x56x56x256xf32> loc(#loc562)
+    %231 = "ttir.transpose"(%229, %230) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x56x256x56xf32>, tensor<1x56x56x256xf32>) -> tensor<1x56x56x256xf32> loc(#loc562)
+    %232 = tensor.empty() : tensor<1x28x28x512xf32> loc(#loc563)
+    %233 = "ttir.conv2d"(%231, %arg121, %232) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 2 : si32, stride_width = 2 : si32}> {channel_last = 1 : si32} : (tensor<1x56x56x256xf32>, tensor<512x256x1x1xf32>, tensor<1x28x28x512xf32>) -> tensor<1x28x28x512xf32> loc(#loc563)
+    %234 = tensor.empty() : tensor<1x28x512x28xf32> loc(#loc564)
+    %235 = "ttir.transpose"(%233, %234) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x28x512xf32>, tensor<1x28x512x28xf32>) -> tensor<1x28x512x28xf32> loc(#loc564)
+    %236 = tensor.empty() : tensor<1x512x28x28xf32> loc(#loc565)
+    %237 = "ttir.transpose"(%235, %236) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x512x28xf32>, tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> loc(#loc565)
+    %238 = tensor.empty() : tensor<1x512x28x28xf32> loc(#loc566)
+    %239 = "ttir.multiply"(%237, %arg29, %238) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x28x28xf32>, tensor<512x1x1xf32>, tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> loc(#loc566)
+    %240 = tensor.empty() : tensor<1x512x28x28xf32> loc(#loc567)
+    %241 = "ttir.add"(%239, %arg30, %240) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x28x28xf32>, tensor<512x1x1xf32>, tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> loc(#loc567)
+    %242 = tensor.empty() : tensor<1x512x28x28xf32> loc(#loc568)
+    %243 = "ttir.add"(%227, %241, %242) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> loc(#loc568)
+    %244 = tensor.empty() : tensor<1x512x28x28xf32> loc(#loc569)
+    %245 = "ttir.relu"(%243, %244) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> loc(#loc569)
+    %246 = tensor.empty() : tensor<1x28x512x28xf32> loc(#loc570)
+    %247 = "ttir.transpose"(%245, %246) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x512x28x28xf32>, tensor<1x28x512x28xf32>) -> tensor<1x28x512x28xf32> loc(#loc570)
+    %248 = tensor.empty() : tensor<1x28x28x512xf32> loc(#loc571)
+    %249 = "ttir.transpose"(%247, %248) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x512x28xf32>, tensor<1x28x28x512xf32>) -> tensor<1x28x28x512xf32> loc(#loc571)
+    %250 = tensor.empty() : tensor<1x28x28x128xf32> loc(#loc572)
+    %251 = "ttir.conv2d"(%249, %arg122, %250) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x28x28x512xf32>, tensor<128x512x1x1xf32>, tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc572)
+    %252 = tensor.empty() : tensor<1x28x128x28xf32> loc(#loc573)
+    %253 = "ttir.transpose"(%251, %252) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x28x128xf32>, tensor<1x28x128x28xf32>) -> tensor<1x28x128x28xf32> loc(#loc573)
+    %254 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc574)
+    %255 = "ttir.transpose"(%253, %254) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x128x28xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc574)
+    %256 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc575)
+    %257 = "ttir.multiply"(%255, %arg31, %256) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<128x1x1xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc575)
+    %258 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc576)
+    %259 = "ttir.add"(%257, %arg32, %258) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<128x1x1xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc576)
+    %260 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc577)
+    %261 = "ttir.relu"(%259, %260) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc577)
+    %262 = tensor.empty() : tensor<1x28x128x28xf32> loc(#loc578)
+    %263 = "ttir.transpose"(%261, %262) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<1x28x128x28xf32>) -> tensor<1x28x128x28xf32> loc(#loc578)
+    %264 = tensor.empty() : tensor<1x28x28x128xf32> loc(#loc579)
+    %265 = "ttir.transpose"(%263, %264) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x128x28xf32>, tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc579)
+    %266 = tensor.empty() : tensor<1x28x28x128xf32> loc(#loc580)
+    %267 = "ttir.conv2d"(%265, %arg123, %266) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 1 : si32, padding_left = 1 : si32, padding_right = 1 : si32, padding_top = 1 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x28x28x128xf32>, tensor<128x128x3x3xf32>, tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc580)
+    %268 = tensor.empty() : tensor<1x28x128x28xf32> loc(#loc581)
+    %269 = "ttir.transpose"(%267, %268) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x28x128xf32>, tensor<1x28x128x28xf32>) -> tensor<1x28x128x28xf32> loc(#loc581)
+    %270 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc582)
+    %271 = "ttir.transpose"(%269, %270) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x128x28xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc582)
+    %272 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc583)
+    %273 = "ttir.multiply"(%271, %arg33, %272) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<128x1x1xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc583)
+    %274 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc584)
+    %275 = "ttir.add"(%273, %arg34, %274) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<128x1x1xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc584)
+    %276 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc585)
+    %277 = "ttir.relu"(%275, %276) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc585)
+    %278 = tensor.empty() : tensor<1x28x128x28xf32> loc(#loc586)
+    %279 = "ttir.transpose"(%277, %278) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<1x28x128x28xf32>) -> tensor<1x28x128x28xf32> loc(#loc586)
+    %280 = tensor.empty() : tensor<1x28x28x128xf32> loc(#loc587)
+    %281 = "ttir.transpose"(%279, %280) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x128x28xf32>, tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc587)
+    %282 = tensor.empty() : tensor<1x28x28x512xf32> loc(#loc588)
+    %283 = "ttir.conv2d"(%281, %arg124, %282) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x28x28x128xf32>, tensor<512x128x1x1xf32>, tensor<1x28x28x512xf32>) -> tensor<1x28x28x512xf32> loc(#loc588)
+    %284 = tensor.empty() : tensor<1x28x512x28xf32> loc(#loc589)
+    %285 = "ttir.transpose"(%283, %284) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x28x512xf32>, tensor<1x28x512x28xf32>) -> tensor<1x28x512x28xf32> loc(#loc589)
+    %286 = tensor.empty() : tensor<1x512x28x28xf32> loc(#loc590)
+    %287 = "ttir.transpose"(%285, %286) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x512x28xf32>, tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> loc(#loc590)
+    %288 = tensor.empty() : tensor<1x512x28x28xf32> loc(#loc591)
+    %289 = "ttir.multiply"(%287, %arg35, %288) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x28x28xf32>, tensor<512x1x1xf32>, tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> loc(#loc591)
+    %290 = tensor.empty() : tensor<1x512x28x28xf32> loc(#loc592)
+    %291 = "ttir.add"(%289, %arg36, %290) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x28x28xf32>, tensor<512x1x1xf32>, tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> loc(#loc592)
+    %292 = tensor.empty() : tensor<1x512x28x28xf32> loc(#loc593)
+    %293 = "ttir.add"(%291, %245, %292) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> loc(#loc593)
+    %294 = tensor.empty() : tensor<1x512x28x28xf32> loc(#loc594)
+    %295 = "ttir.relu"(%293, %294) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> loc(#loc594)
+    %296 = tensor.empty() : tensor<1x28x512x28xf32> loc(#loc595)
+    %297 = "ttir.transpose"(%295, %296) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x512x28x28xf32>, tensor<1x28x512x28xf32>) -> tensor<1x28x512x28xf32> loc(#loc595)
+    %298 = tensor.empty() : tensor<1x28x28x512xf32> loc(#loc596)
+    %299 = "ttir.transpose"(%297, %298) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x512x28xf32>, tensor<1x28x28x512xf32>) -> tensor<1x28x28x512xf32> loc(#loc596)
+    %300 = tensor.empty() : tensor<1x28x28x128xf32> loc(#loc597)
+    %301 = "ttir.conv2d"(%299, %arg125, %300) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x28x28x512xf32>, tensor<128x512x1x1xf32>, tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc597)
+    %302 = tensor.empty() : tensor<1x28x128x28xf32> loc(#loc598)
+    %303 = "ttir.transpose"(%301, %302) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x28x128xf32>, tensor<1x28x128x28xf32>) -> tensor<1x28x128x28xf32> loc(#loc598)
+    %304 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc599)
+    %305 = "ttir.transpose"(%303, %304) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x128x28xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc599)
+    %306 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc600)
+    %307 = "ttir.multiply"(%305, %arg37, %306) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<128x1x1xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc600)
+    %308 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc601)
+    %309 = "ttir.add"(%307, %arg38, %308) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<128x1x1xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc601)
+    %310 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc602)
+    %311 = "ttir.relu"(%309, %310) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc602)
+    %312 = tensor.empty() : tensor<1x28x128x28xf32> loc(#loc603)
+    %313 = "ttir.transpose"(%311, %312) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<1x28x128x28xf32>) -> tensor<1x28x128x28xf32> loc(#loc603)
+    %314 = tensor.empty() : tensor<1x28x28x128xf32> loc(#loc604)
+    %315 = "ttir.transpose"(%313, %314) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x128x28xf32>, tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc604)
+    %316 = tensor.empty() : tensor<1x28x28x128xf32> loc(#loc605)
+    %317 = "ttir.conv2d"(%315, %arg126, %316) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 1 : si32, padding_left = 1 : si32, padding_right = 1 : si32, padding_top = 1 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x28x28x128xf32>, tensor<128x128x3x3xf32>, tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc605)
+    %318 = tensor.empty() : tensor<1x28x128x28xf32> loc(#loc606)
+    %319 = "ttir.transpose"(%317, %318) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x28x128xf32>, tensor<1x28x128x28xf32>) -> tensor<1x28x128x28xf32> loc(#loc606)
+    %320 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc607)
+    %321 = "ttir.transpose"(%319, %320) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x128x28xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc607)
+    %322 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc608)
+    %323 = "ttir.multiply"(%321, %arg39, %322) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<128x1x1xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc608)
+    %324 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc609)
+    %325 = "ttir.add"(%323, %arg40, %324) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<128x1x1xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc609)
+    %326 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc610)
+    %327 = "ttir.relu"(%325, %326) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc610)
+    %328 = tensor.empty() : tensor<1x28x128x28xf32> loc(#loc611)
+    %329 = "ttir.transpose"(%327, %328) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<1x28x128x28xf32>) -> tensor<1x28x128x28xf32> loc(#loc611)
+    %330 = tensor.empty() : tensor<1x28x28x128xf32> loc(#loc612)
+    %331 = "ttir.transpose"(%329, %330) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x128x28xf32>, tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc612)
+    %332 = tensor.empty() : tensor<1x28x28x512xf32> loc(#loc613)
+    %333 = "ttir.conv2d"(%331, %arg127, %332) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x28x28x128xf32>, tensor<512x128x1x1xf32>, tensor<1x28x28x512xf32>) -> tensor<1x28x28x512xf32> loc(#loc613)
+    %334 = tensor.empty() : tensor<1x28x512x28xf32> loc(#loc614)
+    %335 = "ttir.transpose"(%333, %334) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x28x512xf32>, tensor<1x28x512x28xf32>) -> tensor<1x28x512x28xf32> loc(#loc614)
+    %336 = tensor.empty() : tensor<1x512x28x28xf32> loc(#loc615)
+    %337 = "ttir.transpose"(%335, %336) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x512x28xf32>, tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> loc(#loc615)
+    %338 = tensor.empty() : tensor<1x512x28x28xf32> loc(#loc616)
+    %339 = "ttir.multiply"(%337, %arg41, %338) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x28x28xf32>, tensor<512x1x1xf32>, tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> loc(#loc616)
+    %340 = tensor.empty() : tensor<1x512x28x28xf32> loc(#loc617)
+    %341 = "ttir.add"(%339, %arg42, %340) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x28x28xf32>, tensor<512x1x1xf32>, tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> loc(#loc617)
+    %342 = tensor.empty() : tensor<1x512x28x28xf32> loc(#loc618)
+    %343 = "ttir.add"(%341, %295, %342) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> loc(#loc618)
+    %344 = tensor.empty() : tensor<1x512x28x28xf32> loc(#loc619)
+    %345 = "ttir.relu"(%343, %344) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> loc(#loc619)
+    %346 = tensor.empty() : tensor<1x28x512x28xf32> loc(#loc620)
+    %347 = "ttir.transpose"(%345, %346) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x512x28x28xf32>, tensor<1x28x512x28xf32>) -> tensor<1x28x512x28xf32> loc(#loc620)
+    %348 = tensor.empty() : tensor<1x28x28x512xf32> loc(#loc621)
+    %349 = "ttir.transpose"(%347, %348) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x512x28xf32>, tensor<1x28x28x512xf32>) -> tensor<1x28x28x512xf32> loc(#loc621)
+    %350 = tensor.empty() : tensor<1x28x28x128xf32> loc(#loc622)
+    %351 = "ttir.conv2d"(%349, %arg128, %350) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x28x28x512xf32>, tensor<128x512x1x1xf32>, tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc622)
+    %352 = tensor.empty() : tensor<1x28x128x28xf32> loc(#loc623)
+    %353 = "ttir.transpose"(%351, %352) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x28x128xf32>, tensor<1x28x128x28xf32>) -> tensor<1x28x128x28xf32> loc(#loc623)
+    %354 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc624)
+    %355 = "ttir.transpose"(%353, %354) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x128x28xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc624)
+    %356 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc625)
+    %357 = "ttir.multiply"(%355, %arg43, %356) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<128x1x1xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc625)
+    %358 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc626)
+    %359 = "ttir.add"(%357, %arg44, %358) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<128x1x1xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc626)
+    %360 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc627)
+    %361 = "ttir.relu"(%359, %360) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc627)
+    %362 = tensor.empty() : tensor<1x28x128x28xf32> loc(#loc628)
+    %363 = "ttir.transpose"(%361, %362) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<1x28x128x28xf32>) -> tensor<1x28x128x28xf32> loc(#loc628)
+    %364 = tensor.empty() : tensor<1x28x28x128xf32> loc(#loc629)
+    %365 = "ttir.transpose"(%363, %364) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x128x28xf32>, tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc629)
+    %366 = tensor.empty() : tensor<1x28x28x128xf32> loc(#loc630)
+    %367 = "ttir.conv2d"(%365, %arg129, %366) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 1 : si32, padding_left = 1 : si32, padding_right = 1 : si32, padding_top = 1 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x28x28x128xf32>, tensor<128x128x3x3xf32>, tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc630)
+    %368 = tensor.empty() : tensor<1x28x128x28xf32> loc(#loc631)
+    %369 = "ttir.transpose"(%367, %368) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x28x128xf32>, tensor<1x28x128x28xf32>) -> tensor<1x28x128x28xf32> loc(#loc631)
+    %370 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc632)
+    %371 = "ttir.transpose"(%369, %370) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x128x28xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc632)
+    %372 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc633)
+    %373 = "ttir.multiply"(%371, %arg45, %372) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<128x1x1xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc633)
+    %374 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc634)
+    %375 = "ttir.add"(%373, %arg46, %374) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<128x1x1xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc634)
+    %376 = tensor.empty() : tensor<1x128x28x28xf32> loc(#loc635)
+    %377 = "ttir.relu"(%375, %376) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> loc(#loc635)
+    %378 = tensor.empty() : tensor<1x28x128x28xf32> loc(#loc636)
+    %379 = "ttir.transpose"(%377, %378) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x128x28x28xf32>, tensor<1x28x128x28xf32>) -> tensor<1x28x128x28xf32> loc(#loc636)
+    %380 = tensor.empty() : tensor<1x28x28x128xf32> loc(#loc637)
+    %381 = "ttir.transpose"(%379, %380) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x128x28xf32>, tensor<1x28x28x128xf32>) -> tensor<1x28x28x128xf32> loc(#loc637)
+    %382 = tensor.empty() : tensor<1x28x28x512xf32> loc(#loc638)
+    %383 = "ttir.conv2d"(%381, %arg130, %382) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x28x28x128xf32>, tensor<512x128x1x1xf32>, tensor<1x28x28x512xf32>) -> tensor<1x28x28x512xf32> loc(#loc638)
+    %384 = tensor.empty() : tensor<1x28x512x28xf32> loc(#loc639)
+    %385 = "ttir.transpose"(%383, %384) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x28x512xf32>, tensor<1x28x512x28xf32>) -> tensor<1x28x512x28xf32> loc(#loc639)
+    %386 = tensor.empty() : tensor<1x512x28x28xf32> loc(#loc640)
+    %387 = "ttir.transpose"(%385, %386) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x512x28xf32>, tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> loc(#loc640)
+    %388 = tensor.empty() : tensor<1x512x28x28xf32> loc(#loc641)
+    %389 = "ttir.multiply"(%387, %arg47, %388) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x28x28xf32>, tensor<512x1x1xf32>, tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> loc(#loc641)
+    %390 = tensor.empty() : tensor<1x512x28x28xf32> loc(#loc642)
+    %391 = "ttir.add"(%389, %arg48, %390) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x28x28xf32>, tensor<512x1x1xf32>, tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> loc(#loc642)
+    %392 = tensor.empty() : tensor<1x512x28x28xf32> loc(#loc643)
+    %393 = "ttir.add"(%391, %345, %392) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> loc(#loc643)
+    %394 = tensor.empty() : tensor<1x512x28x28xf32> loc(#loc644)
+    %395 = "ttir.relu"(%393, %394) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) -> tensor<1x512x28x28xf32> loc(#loc644)
+    %396 = tensor.empty() : tensor<1x28x512x28xf32> loc(#loc645)
+    %397 = "ttir.transpose"(%395, %396) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x512x28x28xf32>, tensor<1x28x512x28xf32>) -> tensor<1x28x512x28xf32> loc(#loc645)
+    %398 = tensor.empty() : tensor<1x28x28x512xf32> loc(#loc646)
+    %399 = "ttir.transpose"(%397, %398) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x512x28xf32>, tensor<1x28x28x512xf32>) -> tensor<1x28x28x512xf32> loc(#loc646)
+    %400 = tensor.empty() : tensor<1x28x28x256xf32> loc(#loc647)
+    %401 = "ttir.conv2d"(%399, %arg131, %400) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x28x28x512xf32>, tensor<256x512x1x1xf32>, tensor<1x28x28x256xf32>) -> tensor<1x28x28x256xf32> loc(#loc647)
+    %402 = tensor.empty() : tensor<1x28x256x28xf32> loc(#loc648)
+    %403 = "ttir.transpose"(%401, %402) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x28x256xf32>, tensor<1x28x256x28xf32>) -> tensor<1x28x256x28xf32> loc(#loc648)
+    %404 = tensor.empty() : tensor<1x256x28x28xf32> loc(#loc649)
+    %405 = "ttir.transpose"(%403, %404) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x256x28xf32>, tensor<1x256x28x28xf32>) -> tensor<1x256x28x28xf32> loc(#loc649)
+    %406 = tensor.empty() : tensor<1x256x28x28xf32> loc(#loc650)
+    %407 = "ttir.multiply"(%405, %arg49, %406) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x28x28xf32>, tensor<256x1x1xf32>, tensor<1x256x28x28xf32>) -> tensor<1x256x28x28xf32> loc(#loc650)
+    %408 = tensor.empty() : tensor<1x256x28x28xf32> loc(#loc651)
+    %409 = "ttir.add"(%407, %arg50, %408) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x28x28xf32>, tensor<256x1x1xf32>, tensor<1x256x28x28xf32>) -> tensor<1x256x28x28xf32> loc(#loc651)
+    %410 = tensor.empty() : tensor<1x256x28x28xf32> loc(#loc652)
+    %411 = "ttir.relu"(%409, %410) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x28x28xf32>, tensor<1x256x28x28xf32>) -> tensor<1x256x28x28xf32> loc(#loc652)
+    %412 = tensor.empty() : tensor<1x28x256x28xf32> loc(#loc653)
+    %413 = "ttir.transpose"(%411, %412) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x28x28xf32>, tensor<1x28x256x28xf32>) -> tensor<1x28x256x28xf32> loc(#loc653)
+    %414 = tensor.empty() : tensor<1x28x28x256xf32> loc(#loc654)
+    %415 = "ttir.transpose"(%413, %414) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x256x28xf32>, tensor<1x28x28x256xf32>) -> tensor<1x28x28x256xf32> loc(#loc654)
+    %416 = tensor.empty() : tensor<1x14x14x256xf32> loc(#loc655)
+    %417 = "ttir.conv2d"(%415, %arg132, %416) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 1 : si32, padding_left = 1 : si32, padding_right = 1 : si32, padding_top = 1 : si32, stride_height = 2 : si32, stride_width = 2 : si32}> {channel_last = 1 : si32} : (tensor<1x28x28x256xf32>, tensor<256x256x3x3xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc655)
+    %418 = tensor.empty() : tensor<1x14x256x14xf32> loc(#loc656)
+    %419 = "ttir.transpose"(%417, %418) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x14x256xf32>, tensor<1x14x256x14xf32>) -> tensor<1x14x256x14xf32> loc(#loc656)
+    %420 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc657)
+    %421 = "ttir.transpose"(%419, %420) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x256x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc657)
+    %422 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc658)
+    %423 = "ttir.multiply"(%421, %arg51, %422) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<256x1x1xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc658)
+    %424 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc659)
+    %425 = "ttir.add"(%423, %arg52, %424) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<256x1x1xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc659)
+    %426 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc660)
+    %427 = "ttir.relu"(%425, %426) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc660)
+    %428 = tensor.empty() : tensor<1x14x256x14xf32> loc(#loc661)
+    %429 = "ttir.transpose"(%427, %428) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<1x14x256x14xf32>) -> tensor<1x14x256x14xf32> loc(#loc661)
+    %430 = tensor.empty() : tensor<1x14x14x256xf32> loc(#loc662)
+    %431 = "ttir.transpose"(%429, %430) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x256x14xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc662)
+    %432 = tensor.empty() : tensor<1x14x14x1024xf32> loc(#loc663)
+    %433 = "ttir.conv2d"(%431, %arg133, %432) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x14x14x256xf32>, tensor<1024x256x1x1xf32>, tensor<1x14x14x1024xf32>) -> tensor<1x14x14x1024xf32> loc(#loc663)
+    %434 = tensor.empty() : tensor<1x14x1024x14xf32> loc(#loc664)
+    %435 = "ttir.transpose"(%433, %434) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x14x1024xf32>, tensor<1x14x1024x14xf32>) -> tensor<1x14x1024x14xf32> loc(#loc664)
+    %436 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc665)
+    %437 = "ttir.transpose"(%435, %436) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x1024x14xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc665)
+    %438 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc666)
+    %439 = "ttir.multiply"(%437, %arg53, %438) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1024x1x1xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc666)
+    %440 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc667)
+    %441 = "ttir.add"(%439, %arg54, %440) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1024x1x1xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc667)
+    %442 = tensor.empty() : tensor<1x28x512x28xf32> loc(#loc668)
+    %443 = "ttir.transpose"(%395, %442) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x512x28x28xf32>, tensor<1x28x512x28xf32>) -> tensor<1x28x512x28xf32> loc(#loc668)
+    %444 = tensor.empty() : tensor<1x28x28x512xf32> loc(#loc669)
+    %445 = "ttir.transpose"(%443, %444) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x28x512x28xf32>, tensor<1x28x28x512xf32>) -> tensor<1x28x28x512xf32> loc(#loc669)
+    %446 = tensor.empty() : tensor<1x14x14x1024xf32> loc(#loc670)
+    %447 = "ttir.conv2d"(%445, %arg134, %446) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 2 : si32, stride_width = 2 : si32}> {channel_last = 1 : si32} : (tensor<1x28x28x512xf32>, tensor<1024x512x1x1xf32>, tensor<1x14x14x1024xf32>) -> tensor<1x14x14x1024xf32> loc(#loc670)
+    %448 = tensor.empty() : tensor<1x14x1024x14xf32> loc(#loc671)
+    %449 = "ttir.transpose"(%447, %448) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x14x1024xf32>, tensor<1x14x1024x14xf32>) -> tensor<1x14x1024x14xf32> loc(#loc671)
+    %450 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc672)
+    %451 = "ttir.transpose"(%449, %450) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x1024x14xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc672)
+    %452 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc673)
+    %453 = "ttir.multiply"(%451, %arg55, %452) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1024x1x1xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc673)
+    %454 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc674)
+    %455 = "ttir.add"(%453, %arg56, %454) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1024x1x1xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc674)
+    %456 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc675)
+    %457 = "ttir.add"(%441, %455, %456) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc675)
+    %458 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc676)
+    %459 = "ttir.relu"(%457, %458) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc676)
+    %460 = tensor.empty() : tensor<1x14x1024x14xf32> loc(#loc677)
+    %461 = "ttir.transpose"(%459, %460) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1x14x1024x14xf32>) -> tensor<1x14x1024x14xf32> loc(#loc677)
+    %462 = tensor.empty() : tensor<1x14x14x1024xf32> loc(#loc678)
+    %463 = "ttir.transpose"(%461, %462) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x1024x14xf32>, tensor<1x14x14x1024xf32>) -> tensor<1x14x14x1024xf32> loc(#loc678)
+    %464 = tensor.empty() : tensor<1x14x14x256xf32> loc(#loc679)
+    %465 = "ttir.conv2d"(%463, %arg135, %464) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x14x14x1024xf32>, tensor<256x1024x1x1xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc679)
+    %466 = tensor.empty() : tensor<1x14x256x14xf32> loc(#loc680)
+    %467 = "ttir.transpose"(%465, %466) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x14x256xf32>, tensor<1x14x256x14xf32>) -> tensor<1x14x256x14xf32> loc(#loc680)
+    %468 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc681)
+    %469 = "ttir.transpose"(%467, %468) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x256x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc681)
+    %470 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc682)
+    %471 = "ttir.multiply"(%469, %arg57, %470) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<256x1x1xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc682)
+    %472 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc683)
+    %473 = "ttir.add"(%471, %arg58, %472) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<256x1x1xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc683)
+    %474 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc684)
+    %475 = "ttir.relu"(%473, %474) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc684)
+    %476 = tensor.empty() : tensor<1x14x256x14xf32> loc(#loc685)
+    %477 = "ttir.transpose"(%475, %476) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<1x14x256x14xf32>) -> tensor<1x14x256x14xf32> loc(#loc685)
+    %478 = tensor.empty() : tensor<1x14x14x256xf32> loc(#loc686)
+    %479 = "ttir.transpose"(%477, %478) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x256x14xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc686)
+    %480 = tensor.empty() : tensor<1x14x14x256xf32> loc(#loc687)
+    %481 = "ttir.conv2d"(%479, %arg136, %480) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 1 : si32, padding_left = 1 : si32, padding_right = 1 : si32, padding_top = 1 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x14x14x256xf32>, tensor<256x256x3x3xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc687)
+    %482 = tensor.empty() : tensor<1x14x256x14xf32> loc(#loc688)
+    %483 = "ttir.transpose"(%481, %482) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x14x256xf32>, tensor<1x14x256x14xf32>) -> tensor<1x14x256x14xf32> loc(#loc688)
+    %484 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc689)
+    %485 = "ttir.transpose"(%483, %484) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x256x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc689)
+    %486 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc690)
+    %487 = "ttir.multiply"(%485, %arg59, %486) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<256x1x1xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc690)
+    %488 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc691)
+    %489 = "ttir.add"(%487, %arg60, %488) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<256x1x1xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc691)
+    %490 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc692)
+    %491 = "ttir.relu"(%489, %490) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc692)
+    %492 = tensor.empty() : tensor<1x14x256x14xf32> loc(#loc693)
+    %493 = "ttir.transpose"(%491, %492) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<1x14x256x14xf32>) -> tensor<1x14x256x14xf32> loc(#loc693)
+    %494 = tensor.empty() : tensor<1x14x14x256xf32> loc(#loc694)
+    %495 = "ttir.transpose"(%493, %494) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x256x14xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc694)
+    %496 = tensor.empty() : tensor<1x14x14x1024xf32> loc(#loc695)
+    %497 = "ttir.conv2d"(%495, %arg137, %496) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x14x14x256xf32>, tensor<1024x256x1x1xf32>, tensor<1x14x14x1024xf32>) -> tensor<1x14x14x1024xf32> loc(#loc695)
+    %498 = tensor.empty() : tensor<1x14x1024x14xf32> loc(#loc696)
+    %499 = "ttir.transpose"(%497, %498) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x14x1024xf32>, tensor<1x14x1024x14xf32>) -> tensor<1x14x1024x14xf32> loc(#loc696)
+    %500 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc697)
+    %501 = "ttir.transpose"(%499, %500) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x1024x14xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc697)
+    %502 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc698)
+    %503 = "ttir.multiply"(%501, %arg61, %502) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1024x1x1xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc698)
+    %504 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc699)
+    %505 = "ttir.add"(%503, %arg62, %504) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1024x1x1xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc699)
+    %506 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc700)
+    %507 = "ttir.add"(%505, %459, %506) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc700)
+    %508 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc701)
+    %509 = "ttir.relu"(%507, %508) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc701)
+    %510 = tensor.empty() : tensor<1x14x1024x14xf32> loc(#loc702)
+    %511 = "ttir.transpose"(%509, %510) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1x14x1024x14xf32>) -> tensor<1x14x1024x14xf32> loc(#loc702)
+    %512 = tensor.empty() : tensor<1x14x14x1024xf32> loc(#loc703)
+    %513 = "ttir.transpose"(%511, %512) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x1024x14xf32>, tensor<1x14x14x1024xf32>) -> tensor<1x14x14x1024xf32> loc(#loc703)
+    %514 = tensor.empty() : tensor<1x14x14x256xf32> loc(#loc704)
+    %515 = "ttir.conv2d"(%513, %arg138, %514) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x14x14x1024xf32>, tensor<256x1024x1x1xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc704)
+    %516 = tensor.empty() : tensor<1x14x256x14xf32> loc(#loc705)
+    %517 = "ttir.transpose"(%515, %516) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x14x256xf32>, tensor<1x14x256x14xf32>) -> tensor<1x14x256x14xf32> loc(#loc705)
+    %518 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc706)
+    %519 = "ttir.transpose"(%517, %518) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x256x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc706)
+    %520 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc707)
+    %521 = "ttir.multiply"(%519, %arg63, %520) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<256x1x1xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc707)
+    %522 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc708)
+    %523 = "ttir.add"(%521, %arg64, %522) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<256x1x1xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc708)
+    %524 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc709)
+    %525 = "ttir.relu"(%523, %524) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc709)
+    %526 = tensor.empty() : tensor<1x14x256x14xf32> loc(#loc710)
+    %527 = "ttir.transpose"(%525, %526) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<1x14x256x14xf32>) -> tensor<1x14x256x14xf32> loc(#loc710)
+    %528 = tensor.empty() : tensor<1x14x14x256xf32> loc(#loc711)
+    %529 = "ttir.transpose"(%527, %528) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x256x14xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc711)
+    %530 = tensor.empty() : tensor<1x14x14x256xf32> loc(#loc712)
+    %531 = "ttir.conv2d"(%529, %arg139, %530) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 1 : si32, padding_left = 1 : si32, padding_right = 1 : si32, padding_top = 1 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x14x14x256xf32>, tensor<256x256x3x3xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc712)
+    %532 = tensor.empty() : tensor<1x14x256x14xf32> loc(#loc713)
+    %533 = "ttir.transpose"(%531, %532) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x14x256xf32>, tensor<1x14x256x14xf32>) -> tensor<1x14x256x14xf32> loc(#loc713)
+    %534 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc714)
+    %535 = "ttir.transpose"(%533, %534) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x256x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc714)
+    %536 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc715)
+    %537 = "ttir.multiply"(%535, %arg65, %536) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<256x1x1xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc715)
+    %538 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc716)
+    %539 = "ttir.add"(%537, %arg66, %538) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<256x1x1xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc716)
+    %540 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc717)
+    %541 = "ttir.relu"(%539, %540) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc717)
+    %542 = tensor.empty() : tensor<1x14x256x14xf32> loc(#loc718)
+    %543 = "ttir.transpose"(%541, %542) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<1x14x256x14xf32>) -> tensor<1x14x256x14xf32> loc(#loc718)
+    %544 = tensor.empty() : tensor<1x14x14x256xf32> loc(#loc719)
+    %545 = "ttir.transpose"(%543, %544) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x256x14xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc719)
+    %546 = tensor.empty() : tensor<1x14x14x1024xf32> loc(#loc720)
+    %547 = "ttir.conv2d"(%545, %arg140, %546) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x14x14x256xf32>, tensor<1024x256x1x1xf32>, tensor<1x14x14x1024xf32>) -> tensor<1x14x14x1024xf32> loc(#loc720)
+    %548 = tensor.empty() : tensor<1x14x1024x14xf32> loc(#loc721)
+    %549 = "ttir.transpose"(%547, %548) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x14x1024xf32>, tensor<1x14x1024x14xf32>) -> tensor<1x14x1024x14xf32> loc(#loc721)
+    %550 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc722)
+    %551 = "ttir.transpose"(%549, %550) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x1024x14xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc722)
+    %552 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc723)
+    %553 = "ttir.multiply"(%551, %arg67, %552) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1024x1x1xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc723)
+    %554 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc724)
+    %555 = "ttir.add"(%553, %arg68, %554) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1024x1x1xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc724)
+    %556 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc725)
+    %557 = "ttir.add"(%555, %509, %556) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc725)
+    %558 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc726)
+    %559 = "ttir.relu"(%557, %558) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc726)
+    %560 = tensor.empty() : tensor<1x14x1024x14xf32> loc(#loc727)
+    %561 = "ttir.transpose"(%559, %560) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1x14x1024x14xf32>) -> tensor<1x14x1024x14xf32> loc(#loc727)
+    %562 = tensor.empty() : tensor<1x14x14x1024xf32> loc(#loc728)
+    %563 = "ttir.transpose"(%561, %562) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x1024x14xf32>, tensor<1x14x14x1024xf32>) -> tensor<1x14x14x1024xf32> loc(#loc728)
+    %564 = tensor.empty() : tensor<1x14x14x256xf32> loc(#loc729)
+    %565 = "ttir.conv2d"(%563, %arg141, %564) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x14x14x1024xf32>, tensor<256x1024x1x1xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc729)
+    %566 = tensor.empty() : tensor<1x14x256x14xf32> loc(#loc730)
+    %567 = "ttir.transpose"(%565, %566) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x14x256xf32>, tensor<1x14x256x14xf32>) -> tensor<1x14x256x14xf32> loc(#loc730)
+    %568 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc731)
+    %569 = "ttir.transpose"(%567, %568) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x256x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc731)
+    %570 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc732)
+    %571 = "ttir.multiply"(%569, %arg69, %570) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<256x1x1xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc732)
+    %572 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc733)
+    %573 = "ttir.add"(%571, %arg70, %572) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<256x1x1xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc733)
+    %574 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc734)
+    %575 = "ttir.relu"(%573, %574) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc734)
+    %576 = tensor.empty() : tensor<1x14x256x14xf32> loc(#loc735)
+    %577 = "ttir.transpose"(%575, %576) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<1x14x256x14xf32>) -> tensor<1x14x256x14xf32> loc(#loc735)
+    %578 = tensor.empty() : tensor<1x14x14x256xf32> loc(#loc736)
+    %579 = "ttir.transpose"(%577, %578) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x256x14xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc736)
+    %580 = tensor.empty() : tensor<1x14x14x256xf32> loc(#loc737)
+    %581 = "ttir.conv2d"(%579, %arg142, %580) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 1 : si32, padding_left = 1 : si32, padding_right = 1 : si32, padding_top = 1 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x14x14x256xf32>, tensor<256x256x3x3xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc737)
+    %582 = tensor.empty() : tensor<1x14x256x14xf32> loc(#loc738)
+    %583 = "ttir.transpose"(%581, %582) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x14x256xf32>, tensor<1x14x256x14xf32>) -> tensor<1x14x256x14xf32> loc(#loc738)
+    %584 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc739)
+    %585 = "ttir.transpose"(%583, %584) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x256x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc739)
+    %586 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc740)
+    %587 = "ttir.multiply"(%585, %arg71, %586) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<256x1x1xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc740)
+    %588 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc741)
+    %589 = "ttir.add"(%587, %arg72, %588) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<256x1x1xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc741)
+    %590 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc742)
+    %591 = "ttir.relu"(%589, %590) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc742)
+    %592 = tensor.empty() : tensor<1x14x256x14xf32> loc(#loc743)
+    %593 = "ttir.transpose"(%591, %592) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<1x14x256x14xf32>) -> tensor<1x14x256x14xf32> loc(#loc743)
+    %594 = tensor.empty() : tensor<1x14x14x256xf32> loc(#loc744)
+    %595 = "ttir.transpose"(%593, %594) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x256x14xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc744)
+    %596 = tensor.empty() : tensor<1x14x14x1024xf32> loc(#loc745)
+    %597 = "ttir.conv2d"(%595, %arg143, %596) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x14x14x256xf32>, tensor<1024x256x1x1xf32>, tensor<1x14x14x1024xf32>) -> tensor<1x14x14x1024xf32> loc(#loc745)
+    %598 = tensor.empty() : tensor<1x14x1024x14xf32> loc(#loc746)
+    %599 = "ttir.transpose"(%597, %598) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x14x1024xf32>, tensor<1x14x1024x14xf32>) -> tensor<1x14x1024x14xf32> loc(#loc746)
+    %600 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc747)
+    %601 = "ttir.transpose"(%599, %600) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x1024x14xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc747)
+    %602 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc748)
+    %603 = "ttir.multiply"(%601, %arg73, %602) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1024x1x1xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc748)
+    %604 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc749)
+    %605 = "ttir.add"(%603, %arg74, %604) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1024x1x1xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc749)
+    %606 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc750)
+    %607 = "ttir.add"(%605, %559, %606) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc750)
+    %608 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc751)
+    %609 = "ttir.relu"(%607, %608) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc751)
+    %610 = tensor.empty() : tensor<1x14x1024x14xf32> loc(#loc752)
+    %611 = "ttir.transpose"(%609, %610) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1x14x1024x14xf32>) -> tensor<1x14x1024x14xf32> loc(#loc752)
+    %612 = tensor.empty() : tensor<1x14x14x1024xf32> loc(#loc753)
+    %613 = "ttir.transpose"(%611, %612) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x1024x14xf32>, tensor<1x14x14x1024xf32>) -> tensor<1x14x14x1024xf32> loc(#loc753)
+    %614 = tensor.empty() : tensor<1x14x14x256xf32> loc(#loc754)
+    %615 = "ttir.conv2d"(%613, %arg144, %614) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x14x14x1024xf32>, tensor<256x1024x1x1xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc754)
+    %616 = tensor.empty() : tensor<1x14x256x14xf32> loc(#loc755)
+    %617 = "ttir.transpose"(%615, %616) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x14x256xf32>, tensor<1x14x256x14xf32>) -> tensor<1x14x256x14xf32> loc(#loc755)
+    %618 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc756)
+    %619 = "ttir.transpose"(%617, %618) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x256x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc756)
+    %620 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc757)
+    %621 = "ttir.multiply"(%619, %arg75, %620) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<256x1x1xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc757)
+    %622 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc758)
+    %623 = "ttir.add"(%621, %arg76, %622) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<256x1x1xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc758)
+    %624 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc759)
+    %625 = "ttir.relu"(%623, %624) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc759)
+    %626 = tensor.empty() : tensor<1x14x256x14xf32> loc(#loc760)
+    %627 = "ttir.transpose"(%625, %626) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<1x14x256x14xf32>) -> tensor<1x14x256x14xf32> loc(#loc760)
+    %628 = tensor.empty() : tensor<1x14x14x256xf32> loc(#loc761)
+    %629 = "ttir.transpose"(%627, %628) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x256x14xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc761)
+    %630 = tensor.empty() : tensor<1x14x14x256xf32> loc(#loc762)
+    %631 = "ttir.conv2d"(%629, %arg145, %630) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 1 : si32, padding_left = 1 : si32, padding_right = 1 : si32, padding_top = 1 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x14x14x256xf32>, tensor<256x256x3x3xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc762)
+    %632 = tensor.empty() : tensor<1x14x256x14xf32> loc(#loc763)
+    %633 = "ttir.transpose"(%631, %632) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x14x256xf32>, tensor<1x14x256x14xf32>) -> tensor<1x14x256x14xf32> loc(#loc763)
+    %634 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc764)
+    %635 = "ttir.transpose"(%633, %634) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x256x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc764)
+    %636 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc765)
+    %637 = "ttir.multiply"(%635, %arg77, %636) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<256x1x1xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc765)
+    %638 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc766)
+    %639 = "ttir.add"(%637, %arg78, %638) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<256x1x1xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc766)
+    %640 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc767)
+    %641 = "ttir.relu"(%639, %640) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc767)
+    %642 = tensor.empty() : tensor<1x14x256x14xf32> loc(#loc768)
+    %643 = "ttir.transpose"(%641, %642) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<1x14x256x14xf32>) -> tensor<1x14x256x14xf32> loc(#loc768)
+    %644 = tensor.empty() : tensor<1x14x14x256xf32> loc(#loc769)
+    %645 = "ttir.transpose"(%643, %644) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x256x14xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc769)
+    %646 = tensor.empty() : tensor<1x14x14x1024xf32> loc(#loc770)
+    %647 = "ttir.conv2d"(%645, %arg146, %646) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x14x14x256xf32>, tensor<1024x256x1x1xf32>, tensor<1x14x14x1024xf32>) -> tensor<1x14x14x1024xf32> loc(#loc770)
+    %648 = tensor.empty() : tensor<1x14x1024x14xf32> loc(#loc771)
+    %649 = "ttir.transpose"(%647, %648) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x14x1024xf32>, tensor<1x14x1024x14xf32>) -> tensor<1x14x1024x14xf32> loc(#loc771)
+    %650 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc772)
+    %651 = "ttir.transpose"(%649, %650) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x1024x14xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc772)
+    %652 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc773)
+    %653 = "ttir.multiply"(%651, %arg79, %652) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1024x1x1xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc773)
+    %654 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc774)
+    %655 = "ttir.add"(%653, %arg80, %654) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1024x1x1xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc774)
+    %656 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc775)
+    %657 = "ttir.add"(%655, %609, %656) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc775)
+    %658 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc776)
+    %659 = "ttir.relu"(%657, %658) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc776)
+    %660 = tensor.empty() : tensor<1x14x1024x14xf32> loc(#loc777)
+    %661 = "ttir.transpose"(%659, %660) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1x14x1024x14xf32>) -> tensor<1x14x1024x14xf32> loc(#loc777)
+    %662 = tensor.empty() : tensor<1x14x14x1024xf32> loc(#loc778)
+    %663 = "ttir.transpose"(%661, %662) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x1024x14xf32>, tensor<1x14x14x1024xf32>) -> tensor<1x14x14x1024xf32> loc(#loc778)
+    %664 = tensor.empty() : tensor<1x14x14x256xf32> loc(#loc779)
+    %665 = "ttir.conv2d"(%663, %arg147, %664) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x14x14x1024xf32>, tensor<256x1024x1x1xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc779)
+    %666 = tensor.empty() : tensor<1x14x256x14xf32> loc(#loc780)
+    %667 = "ttir.transpose"(%665, %666) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x14x256xf32>, tensor<1x14x256x14xf32>) -> tensor<1x14x256x14xf32> loc(#loc780)
+    %668 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc781)
+    %669 = "ttir.transpose"(%667, %668) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x256x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc781)
+    %670 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc782)
+    %671 = "ttir.multiply"(%669, %arg81, %670) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<256x1x1xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc782)
+    %672 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc783)
+    %673 = "ttir.add"(%671, %arg82, %672) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<256x1x1xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc783)
+    %674 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc784)
+    %675 = "ttir.relu"(%673, %674) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc784)
+    %676 = tensor.empty() : tensor<1x14x256x14xf32> loc(#loc785)
+    %677 = "ttir.transpose"(%675, %676) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<1x14x256x14xf32>) -> tensor<1x14x256x14xf32> loc(#loc785)
+    %678 = tensor.empty() : tensor<1x14x14x256xf32> loc(#loc786)
+    %679 = "ttir.transpose"(%677, %678) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x256x14xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc786)
+    %680 = tensor.empty() : tensor<1x14x14x256xf32> loc(#loc787)
+    %681 = "ttir.conv2d"(%679, %arg148, %680) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 1 : si32, padding_left = 1 : si32, padding_right = 1 : si32, padding_top = 1 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x14x14x256xf32>, tensor<256x256x3x3xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc787)
+    %682 = tensor.empty() : tensor<1x14x256x14xf32> loc(#loc788)
+    %683 = "ttir.transpose"(%681, %682) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x14x256xf32>, tensor<1x14x256x14xf32>) -> tensor<1x14x256x14xf32> loc(#loc788)
+    %684 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc789)
+    %685 = "ttir.transpose"(%683, %684) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x256x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc789)
+    %686 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc790)
+    %687 = "ttir.multiply"(%685, %arg83, %686) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<256x1x1xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc790)
+    %688 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc791)
+    %689 = "ttir.add"(%687, %arg84, %688) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<256x1x1xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc791)
+    %690 = tensor.empty() : tensor<1x256x14x14xf32> loc(#loc792)
+    %691 = "ttir.relu"(%689, %690) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> loc(#loc792)
+    %692 = tensor.empty() : tensor<1x14x256x14xf32> loc(#loc793)
+    %693 = "ttir.transpose"(%691, %692) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256x14x14xf32>, tensor<1x14x256x14xf32>) -> tensor<1x14x256x14xf32> loc(#loc793)
+    %694 = tensor.empty() : tensor<1x14x14x256xf32> loc(#loc794)
+    %695 = "ttir.transpose"(%693, %694) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x256x14xf32>, tensor<1x14x14x256xf32>) -> tensor<1x14x14x256xf32> loc(#loc794)
+    %696 = tensor.empty() : tensor<1x14x14x1024xf32> loc(#loc795)
+    %697 = "ttir.conv2d"(%695, %arg149, %696) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x14x14x256xf32>, tensor<1024x256x1x1xf32>, tensor<1x14x14x1024xf32>) -> tensor<1x14x14x1024xf32> loc(#loc795)
+    %698 = tensor.empty() : tensor<1x14x1024x14xf32> loc(#loc796)
+    %699 = "ttir.transpose"(%697, %698) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x14x1024xf32>, tensor<1x14x1024x14xf32>) -> tensor<1x14x1024x14xf32> loc(#loc796)
+    %700 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc797)
+    %701 = "ttir.transpose"(%699, %700) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x1024x14xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc797)
+    %702 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc798)
+    %703 = "ttir.multiply"(%701, %arg85, %702) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1024x1x1xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc798)
+    %704 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc799)
+    %705 = "ttir.add"(%703, %arg86, %704) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1024x1x1xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc799)
+    %706 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc800)
+    %707 = "ttir.add"(%705, %659, %706) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc800)
+    %708 = tensor.empty() : tensor<1x1024x14x14xf32> loc(#loc801)
+    %709 = "ttir.relu"(%707, %708) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) -> tensor<1x1024x14x14xf32> loc(#loc801)
+    %710 = tensor.empty() : tensor<1x14x1024x14xf32> loc(#loc802)
+    %711 = "ttir.transpose"(%709, %710) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1x14x1024x14xf32>) -> tensor<1x14x1024x14xf32> loc(#loc802)
+    %712 = tensor.empty() : tensor<1x14x14x1024xf32> loc(#loc803)
+    %713 = "ttir.transpose"(%711, %712) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x1024x14xf32>, tensor<1x14x14x1024xf32>) -> tensor<1x14x14x1024xf32> loc(#loc803)
+    %714 = tensor.empty() : tensor<1x14x14x512xf32> loc(#loc804)
+    %715 = "ttir.conv2d"(%713, %arg150, %714) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x14x14x1024xf32>, tensor<512x1024x1x1xf32>, tensor<1x14x14x512xf32>) -> tensor<1x14x14x512xf32> loc(#loc804)
+    %716 = tensor.empty() : tensor<1x14x512x14xf32> loc(#loc805)
+    %717 = "ttir.transpose"(%715, %716) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x14x512xf32>, tensor<1x14x512x14xf32>) -> tensor<1x14x512x14xf32> loc(#loc805)
+    %718 = tensor.empty() : tensor<1x512x14x14xf32> loc(#loc806)
+    %719 = "ttir.transpose"(%717, %718) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x512x14xf32>, tensor<1x512x14x14xf32>) -> tensor<1x512x14x14xf32> loc(#loc806)
+    %720 = tensor.empty() : tensor<1x512x14x14xf32> loc(#loc807)
+    %721 = "ttir.multiply"(%719, %arg87, %720) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x14x14xf32>, tensor<512x1x1xf32>, tensor<1x512x14x14xf32>) -> tensor<1x512x14x14xf32> loc(#loc807)
+    %722 = tensor.empty() : tensor<1x512x14x14xf32> loc(#loc808)
+    %723 = "ttir.add"(%721, %arg88, %722) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x14x14xf32>, tensor<512x1x1xf32>, tensor<1x512x14x14xf32>) -> tensor<1x512x14x14xf32> loc(#loc808)
+    %724 = tensor.empty() : tensor<1x512x14x14xf32> loc(#loc809)
+    %725 = "ttir.relu"(%723, %724) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x512x14x14xf32>, tensor<1x512x14x14xf32>) -> tensor<1x512x14x14xf32> loc(#loc809)
+    %726 = tensor.empty() : tensor<1x14x512x14xf32> loc(#loc810)
+    %727 = "ttir.transpose"(%725, %726) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x512x14x14xf32>, tensor<1x14x512x14xf32>) -> tensor<1x14x512x14xf32> loc(#loc810)
+    %728 = tensor.empty() : tensor<1x14x14x512xf32> loc(#loc811)
+    %729 = "ttir.transpose"(%727, %728) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x512x14xf32>, tensor<1x14x14x512xf32>) -> tensor<1x14x14x512xf32> loc(#loc811)
+    %730 = tensor.empty() : tensor<1x7x7x512xf32> loc(#loc812)
+    %731 = "ttir.conv2d"(%729, %arg151, %730) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 1 : si32, padding_left = 1 : si32, padding_right = 1 : si32, padding_top = 1 : si32, stride_height = 2 : si32, stride_width = 2 : si32}> {channel_last = 1 : si32} : (tensor<1x14x14x512xf32>, tensor<512x512x3x3xf32>, tensor<1x7x7x512xf32>) -> tensor<1x7x7x512xf32> loc(#loc812)
+    %732 = tensor.empty() : tensor<1x7x512x7xf32> loc(#loc813)
+    %733 = "ttir.transpose"(%731, %732) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x7x7x512xf32>, tensor<1x7x512x7xf32>) -> tensor<1x7x512x7xf32> loc(#loc813)
+    %734 = tensor.empty() : tensor<1x512x7x7xf32> loc(#loc814)
+    %735 = "ttir.transpose"(%733, %734) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x7x512x7xf32>, tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> loc(#loc814)
+    %736 = tensor.empty() : tensor<1x512x7x7xf32> loc(#loc815)
+    %737 = "ttir.multiply"(%735, %arg89, %736) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x7x7xf32>, tensor<512x1x1xf32>, tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> loc(#loc815)
+    %738 = tensor.empty() : tensor<1x512x7x7xf32> loc(#loc816)
+    %739 = "ttir.add"(%737, %arg90, %738) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x7x7xf32>, tensor<512x1x1xf32>, tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> loc(#loc816)
+    %740 = tensor.empty() : tensor<1x512x7x7xf32> loc(#loc817)
+    %741 = "ttir.relu"(%739, %740) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> loc(#loc817)
+    %742 = tensor.empty() : tensor<1x7x512x7xf32> loc(#loc818)
+    %743 = "ttir.transpose"(%741, %742) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x512x7x7xf32>, tensor<1x7x512x7xf32>) -> tensor<1x7x512x7xf32> loc(#loc818)
+    %744 = tensor.empty() : tensor<1x7x7x512xf32> loc(#loc819)
+    %745 = "ttir.transpose"(%743, %744) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x7x512x7xf32>, tensor<1x7x7x512xf32>) -> tensor<1x7x7x512xf32> loc(#loc819)
+    %746 = tensor.empty() : tensor<1x7x7x2048xf32> loc(#loc820)
+    %747 = "ttir.conv2d"(%745, %arg152, %746) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x7x7x512xf32>, tensor<2048x512x1x1xf32>, tensor<1x7x7x2048xf32>) -> tensor<1x7x7x2048xf32> loc(#loc820)
+    %748 = tensor.empty() : tensor<1x7x2048x7xf32> loc(#loc821)
+    %749 = "ttir.transpose"(%747, %748) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x7x7x2048xf32>, tensor<1x7x2048x7xf32>) -> tensor<1x7x2048x7xf32> loc(#loc821)
+    %750 = tensor.empty() : tensor<1x2048x7x7xf32> loc(#loc822)
+    %751 = "ttir.transpose"(%749, %750) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x7x2048x7xf32>, tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> loc(#loc822)
+    %752 = tensor.empty() : tensor<1x2048x7x7xf32> loc(#loc823)
+    %753 = "ttir.multiply"(%751, %arg91, %752) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x2048x7x7xf32>, tensor<2048x1x1xf32>, tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> loc(#loc823)
+    %754 = tensor.empty() : tensor<1x2048x7x7xf32> loc(#loc824)
+    %755 = "ttir.add"(%753, %arg92, %754) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x2048x7x7xf32>, tensor<2048x1x1xf32>, tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> loc(#loc824)
+    %756 = tensor.empty() : tensor<1x14x1024x14xf32> loc(#loc825)
+    %757 = "ttir.transpose"(%709, %756) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x1024x14x14xf32>, tensor<1x14x1024x14xf32>) -> tensor<1x14x1024x14xf32> loc(#loc825)
+    %758 = tensor.empty() : tensor<1x14x14x1024xf32> loc(#loc826)
+    %759 = "ttir.transpose"(%757, %758) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x14x1024x14xf32>, tensor<1x14x14x1024xf32>) -> tensor<1x14x14x1024xf32> loc(#loc826)
+    %760 = tensor.empty() : tensor<1x7x7x2048xf32> loc(#loc827)
+    %761 = "ttir.conv2d"(%759, %arg153, %760) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 2 : si32, stride_width = 2 : si32}> {channel_last = 1 : si32} : (tensor<1x14x14x1024xf32>, tensor<2048x1024x1x1xf32>, tensor<1x7x7x2048xf32>) -> tensor<1x7x7x2048xf32> loc(#loc827)
+    %762 = tensor.empty() : tensor<1x7x2048x7xf32> loc(#loc828)
+    %763 = "ttir.transpose"(%761, %762) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x7x7x2048xf32>, tensor<1x7x2048x7xf32>) -> tensor<1x7x2048x7xf32> loc(#loc828)
+    %764 = tensor.empty() : tensor<1x2048x7x7xf32> loc(#loc829)
+    %765 = "ttir.transpose"(%763, %764) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x7x2048x7xf32>, tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> loc(#loc829)
+    %766 = tensor.empty() : tensor<1x2048x7x7xf32> loc(#loc830)
+    %767 = "ttir.multiply"(%765, %arg93, %766) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x2048x7x7xf32>, tensor<2048x1x1xf32>, tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> loc(#loc830)
+    %768 = tensor.empty() : tensor<1x2048x7x7xf32> loc(#loc831)
+    %769 = "ttir.add"(%767, %arg94, %768) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x2048x7x7xf32>, tensor<2048x1x1xf32>, tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> loc(#loc831)
+    %770 = tensor.empty() : tensor<1x2048x7x7xf32> loc(#loc832)
+    %771 = "ttir.add"(%755, %769, %770) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x2048x7x7xf32>, tensor<1x2048x7x7xf32>, tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> loc(#loc832)
+    %772 = tensor.empty() : tensor<1x2048x7x7xf32> loc(#loc833)
+    %773 = "ttir.relu"(%771, %772) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x2048x7x7xf32>, tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> loc(#loc833)
+    %774 = tensor.empty() : tensor<1x7x2048x7xf32> loc(#loc834)
+    %775 = "ttir.transpose"(%773, %774) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x2048x7x7xf32>, tensor<1x7x2048x7xf32>) -> tensor<1x7x2048x7xf32> loc(#loc834)
+    %776 = tensor.empty() : tensor<1x7x7x2048xf32> loc(#loc835)
+    %777 = "ttir.transpose"(%775, %776) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x7x2048x7xf32>, tensor<1x7x7x2048xf32>) -> tensor<1x7x7x2048xf32> loc(#loc835)
+    %778 = tensor.empty() : tensor<1x7x7x512xf32> loc(#loc836)
+    %779 = "ttir.conv2d"(%777, %arg154, %778) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x7x7x2048xf32>, tensor<512x2048x1x1xf32>, tensor<1x7x7x512xf32>) -> tensor<1x7x7x512xf32> loc(#loc836)
+    %780 = tensor.empty() : tensor<1x7x512x7xf32> loc(#loc837)
+    %781 = "ttir.transpose"(%779, %780) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x7x7x512xf32>, tensor<1x7x512x7xf32>) -> tensor<1x7x512x7xf32> loc(#loc837)
+    %782 = tensor.empty() : tensor<1x512x7x7xf32> loc(#loc838)
+    %783 = "ttir.transpose"(%781, %782) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x7x512x7xf32>, tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> loc(#loc838)
+    %784 = tensor.empty() : tensor<1x512x7x7xf32> loc(#loc839)
+    %785 = "ttir.multiply"(%783, %arg95, %784) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x7x7xf32>, tensor<512x1x1xf32>, tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> loc(#loc839)
+    %786 = tensor.empty() : tensor<1x512x7x7xf32> loc(#loc840)
+    %787 = "ttir.add"(%785, %arg96, %786) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x7x7xf32>, tensor<512x1x1xf32>, tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> loc(#loc840)
+    %788 = tensor.empty() : tensor<1x512x7x7xf32> loc(#loc841)
+    %789 = "ttir.relu"(%787, %788) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> loc(#loc841)
+    %790 = tensor.empty() : tensor<1x7x512x7xf32> loc(#loc842)
+    %791 = "ttir.transpose"(%789, %790) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x512x7x7xf32>, tensor<1x7x512x7xf32>) -> tensor<1x7x512x7xf32> loc(#loc842)
+    %792 = tensor.empty() : tensor<1x7x7x512xf32> loc(#loc843)
+    %793 = "ttir.transpose"(%791, %792) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x7x512x7xf32>, tensor<1x7x7x512xf32>) -> tensor<1x7x7x512xf32> loc(#loc843)
+    %794 = tensor.empty() : tensor<1x7x7x512xf32> loc(#loc844)
+    %795 = "ttir.conv2d"(%793, %arg155, %794) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 1 : si32, padding_left = 1 : si32, padding_right = 1 : si32, padding_top = 1 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x7x7x512xf32>, tensor<512x512x3x3xf32>, tensor<1x7x7x512xf32>) -> tensor<1x7x7x512xf32> loc(#loc844)
+    %796 = tensor.empty() : tensor<1x7x512x7xf32> loc(#loc845)
+    %797 = "ttir.transpose"(%795, %796) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x7x7x512xf32>, tensor<1x7x512x7xf32>) -> tensor<1x7x512x7xf32> loc(#loc845)
+    %798 = tensor.empty() : tensor<1x512x7x7xf32> loc(#loc846)
+    %799 = "ttir.transpose"(%797, %798) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x7x512x7xf32>, tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> loc(#loc846)
+    %800 = tensor.empty() : tensor<1x512x7x7xf32> loc(#loc847)
+    %801 = "ttir.multiply"(%799, %arg97, %800) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x7x7xf32>, tensor<512x1x1xf32>, tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> loc(#loc847)
+    %802 = tensor.empty() : tensor<1x512x7x7xf32> loc(#loc848)
+    %803 = "ttir.add"(%801, %arg98, %802) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x7x7xf32>, tensor<512x1x1xf32>, tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> loc(#loc848)
+    %804 = tensor.empty() : tensor<1x512x7x7xf32> loc(#loc849)
+    %805 = "ttir.relu"(%803, %804) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> loc(#loc849)
+    %806 = tensor.empty() : tensor<1x7x512x7xf32> loc(#loc850)
+    %807 = "ttir.transpose"(%805, %806) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x512x7x7xf32>, tensor<1x7x512x7xf32>) -> tensor<1x7x512x7xf32> loc(#loc850)
+    %808 = tensor.empty() : tensor<1x7x7x512xf32> loc(#loc851)
+    %809 = "ttir.transpose"(%807, %808) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x7x512x7xf32>, tensor<1x7x7x512xf32>) -> tensor<1x7x7x512xf32> loc(#loc851)
+    %810 = tensor.empty() : tensor<1x7x7x2048xf32> loc(#loc852)
+    %811 = "ttir.conv2d"(%809, %arg156, %810) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x7x7x512xf32>, tensor<2048x512x1x1xf32>, tensor<1x7x7x2048xf32>) -> tensor<1x7x7x2048xf32> loc(#loc852)
+    %812 = tensor.empty() : tensor<1x7x2048x7xf32> loc(#loc853)
+    %813 = "ttir.transpose"(%811, %812) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x7x7x2048xf32>, tensor<1x7x2048x7xf32>) -> tensor<1x7x2048x7xf32> loc(#loc853)
+    %814 = tensor.empty() : tensor<1x2048x7x7xf32> loc(#loc854)
+    %815 = "ttir.transpose"(%813, %814) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x7x2048x7xf32>, tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> loc(#loc854)
+    %816 = tensor.empty() : tensor<1x2048x7x7xf32> loc(#loc855)
+    %817 = "ttir.multiply"(%815, %arg99, %816) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x2048x7x7xf32>, tensor<2048x1x1xf32>, tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> loc(#loc855)
+    %818 = tensor.empty() : tensor<1x2048x7x7xf32> loc(#loc856)
+    %819 = "ttir.add"(%817, %arg100, %818) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x2048x7x7xf32>, tensor<2048x1x1xf32>, tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> loc(#loc856)
+    %820 = tensor.empty() : tensor<1x2048x7x7xf32> loc(#loc857)
+    %821 = "ttir.add"(%819, %773, %820) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x2048x7x7xf32>, tensor<1x2048x7x7xf32>, tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> loc(#loc857)
+    %822 = tensor.empty() : tensor<1x2048x7x7xf32> loc(#loc858)
+    %823 = "ttir.relu"(%821, %822) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x2048x7x7xf32>, tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> loc(#loc858)
+    %824 = tensor.empty() : tensor<1x7x2048x7xf32> loc(#loc859)
+    %825 = "ttir.transpose"(%823, %824) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x2048x7x7xf32>, tensor<1x7x2048x7xf32>) -> tensor<1x7x2048x7xf32> loc(#loc859)
+    %826 = tensor.empty() : tensor<1x7x7x2048xf32> loc(#loc860)
+    %827 = "ttir.transpose"(%825, %826) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x7x2048x7xf32>, tensor<1x7x7x2048xf32>) -> tensor<1x7x7x2048xf32> loc(#loc860)
+    %828 = tensor.empty() : tensor<1x7x7x512xf32> loc(#loc861)
+    %829 = "ttir.conv2d"(%827, %arg157, %828) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x7x7x2048xf32>, tensor<512x2048x1x1xf32>, tensor<1x7x7x512xf32>) -> tensor<1x7x7x512xf32> loc(#loc861)
+    %830 = tensor.empty() : tensor<1x7x512x7xf32> loc(#loc862)
+    %831 = "ttir.transpose"(%829, %830) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x7x7x512xf32>, tensor<1x7x512x7xf32>) -> tensor<1x7x512x7xf32> loc(#loc862)
+    %832 = tensor.empty() : tensor<1x512x7x7xf32> loc(#loc863)
+    %833 = "ttir.transpose"(%831, %832) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x7x512x7xf32>, tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> loc(#loc863)
+    %834 = tensor.empty() : tensor<1x512x7x7xf32> loc(#loc864)
+    %835 = "ttir.multiply"(%833, %arg101, %834) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x7x7xf32>, tensor<512x1x1xf32>, tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> loc(#loc864)
+    %836 = tensor.empty() : tensor<1x512x7x7xf32> loc(#loc865)
+    %837 = "ttir.add"(%835, %arg102, %836) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x7x7xf32>, tensor<512x1x1xf32>, tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> loc(#loc865)
+    %838 = tensor.empty() : tensor<1x512x7x7xf32> loc(#loc866)
+    %839 = "ttir.relu"(%837, %838) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> loc(#loc866)
+    %840 = tensor.empty() : tensor<1x7x512x7xf32> loc(#loc867)
+    %841 = "ttir.transpose"(%839, %840) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x512x7x7xf32>, tensor<1x7x512x7xf32>) -> tensor<1x7x512x7xf32> loc(#loc867)
+    %842 = tensor.empty() : tensor<1x7x7x512xf32> loc(#loc868)
+    %843 = "ttir.transpose"(%841, %842) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x7x512x7xf32>, tensor<1x7x7x512xf32>) -> tensor<1x7x7x512xf32> loc(#loc868)
+    %844 = tensor.empty() : tensor<1x7x7x512xf32> loc(#loc869)
+    %845 = "ttir.conv2d"(%843, %arg158, %844) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 1 : si32, padding_left = 1 : si32, padding_right = 1 : si32, padding_top = 1 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x7x7x512xf32>, tensor<512x512x3x3xf32>, tensor<1x7x7x512xf32>) -> tensor<1x7x7x512xf32> loc(#loc869)
+    %846 = tensor.empty() : tensor<1x7x512x7xf32> loc(#loc870)
+    %847 = "ttir.transpose"(%845, %846) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x7x7x512xf32>, tensor<1x7x512x7xf32>) -> tensor<1x7x512x7xf32> loc(#loc870)
+    %848 = tensor.empty() : tensor<1x512x7x7xf32> loc(#loc871)
+    %849 = "ttir.transpose"(%847, %848) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x7x512x7xf32>, tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> loc(#loc871)
+    %850 = tensor.empty() : tensor<1x512x7x7xf32> loc(#loc872)
+    %851 = "ttir.multiply"(%849, %arg103, %850) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x7x7xf32>, tensor<512x1x1xf32>, tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> loc(#loc872)
+    %852 = tensor.empty() : tensor<1x512x7x7xf32> loc(#loc873)
+    %853 = "ttir.add"(%851, %arg104, %852) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x512x7x7xf32>, tensor<512x1x1xf32>, tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> loc(#loc873)
+    %854 = tensor.empty() : tensor<1x512x7x7xf32> loc(#loc874)
+    %855 = "ttir.relu"(%853, %854) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> loc(#loc874)
+    %856 = tensor.empty() : tensor<1x7x512x7xf32> loc(#loc875)
+    %857 = "ttir.transpose"(%855, %856) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x512x7x7xf32>, tensor<1x7x512x7xf32>) -> tensor<1x7x512x7xf32> loc(#loc875)
+    %858 = tensor.empty() : tensor<1x7x7x512xf32> loc(#loc876)
+    %859 = "ttir.transpose"(%857, %858) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x7x512x7xf32>, tensor<1x7x7x512xf32>) -> tensor<1x7x7x512xf32> loc(#loc876)
+    %860 = tensor.empty() : tensor<1x7x7x2048xf32> loc(#loc877)
+    %861 = "ttir.conv2d"(%859, %arg159, %860) <{dilation_height = 1 : si32, dilation_width = 1 : si32, groups = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device], padding_bottom = 0 : si32, padding_left = 0 : si32, padding_right = 0 : si32, padding_top = 0 : si32, stride_height = 1 : si32, stride_width = 1 : si32}> {channel_last = 1 : si32} : (tensor<1x7x7x512xf32>, tensor<2048x512x1x1xf32>, tensor<1x7x7x2048xf32>) -> tensor<1x7x7x2048xf32> loc(#loc877)
+    %862 = tensor.empty() : tensor<1x7x2048x7xf32> loc(#loc878)
+    %863 = "ttir.transpose"(%861, %862) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x7x7x2048xf32>, tensor<1x7x2048x7xf32>) -> tensor<1x7x2048x7xf32> loc(#loc878)
+    %864 = tensor.empty() : tensor<1x2048x7x7xf32> loc(#loc879)
+    %865 = "ttir.transpose"(%863, %864) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x7x2048x7xf32>, tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> loc(#loc879)
+    %866 = tensor.empty() : tensor<1x2048x7x7xf32> loc(#loc880)
+    %867 = "ttir.multiply"(%865, %arg105, %866) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x2048x7x7xf32>, tensor<2048x1x1xf32>, tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> loc(#loc880)
+    %868 = tensor.empty() : tensor<1x2048x7x7xf32> loc(#loc881)
+    %869 = "ttir.add"(%867, %arg106, %868) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x2048x7x7xf32>, tensor<2048x1x1xf32>, tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> loc(#loc881)
+    %870 = tensor.empty() : tensor<1x2048x7x7xf32> loc(#loc882)
+    %871 = "ttir.add"(%869, %823, %870) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x2048x7x7xf32>, tensor<1x2048x7x7xf32>, tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> loc(#loc882)
+    %872 = tensor.empty() : tensor<1x2048x7x7xf32> loc(#loc883)
+    %873 = "ttir.relu"(%871, %872) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x2048x7x7xf32>, tensor<1x2048x7x7xf32>) -> tensor<1x2048x7x7xf32> loc(#loc883)
+    %874 = tensor.empty() : tensor<1x1x2048x49xf32> loc(#loc884)
+    %875 = "ttir.reshape"(%873, %874) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 1 : i32, 2048 : i32, 49 : i32]}> : (tensor<1x2048x7x7xf32>, tensor<1x1x2048x49xf32>) -> tensor<1x1x2048x49xf32> loc(#loc884)
+    %876 = tensor.empty() : tensor<1x1x49x2048xf32> loc(#loc885)
+    %877 = "ttir.transpose"(%875, %876) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x1x2048x49xf32>, tensor<1x1x49x2048xf32>) -> tensor<1x1x49x2048xf32> loc(#loc885)
+    %878 = tensor.empty() : tensor<1x1x1x2048xf32> loc(#loc886)
+    %879 = "ttir.mean"(%877, %878) <{keep_dim = true, operand_constraints = [#any_device, #any_device]}> {dim = -2 : si32} : (tensor<1x1x49x2048xf32>, tensor<1x1x1x2048xf32>) -> tensor<1x1x1x2048xf32> loc(#loc886)
+    %880 = tensor.empty() : tensor<1x2048x1x1xf32> loc(#loc887)
+    %881 = "ttir.reshape"(%879, %880) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 2048 : i32, 1 : i32, 1 : i32]}> : (tensor<1x1x1x2048xf32>, tensor<1x2048x1x1xf32>) -> tensor<1x2048x1x1xf32> loc(#loc887)
+    %882 = tensor.empty() : tensor<1x2048x1xf32> loc(#loc888)
+    %883 = "ttir.squeeze"(%881, %882) <{dim = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x2048x1x1xf32>, tensor<1x2048x1xf32>) -> tensor<1x2048x1xf32> loc(#loc888)
+    %884 = tensor.empty() : tensor<1x2048xf32> loc(#loc889)
+    %885 = "ttir.squeeze"(%883, %884) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x2048x1xf32>, tensor<1x2048xf32>) -> tensor<1x2048xf32> loc(#loc889)
+    %886 = tensor.empty() : tensor<1x1000xf32> loc(#loc890)
+    %887 = "ttir.matmul"(%885, %arg160, %886) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x2048xf32>, tensor<2048x1000xf32>, tensor<1x1000xf32>) -> tensor<1x1000xf32> loc(#loc890)
+    %888 = tensor.empty() : tensor<1x1000xf32> loc(#loc891)
+    %889 = "ttir.add"(%887, %arg161, %888) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1000xf32>, tensor<1000xf32>, tensor<1x1000xf32>) -> tensor<1x1000xf32> loc(#loc891)
+    return %889 : tensor<1x1000xf32> loc(#loc446)
+  } loc(#loc)
+} loc(#loc)
+#loc1 = loc("forward":4294967295:2951)
+#loc2 = loc("forward":4294967295:2952)
+#loc3 = loc("forward":4294967295:2954)
+#loc4 = loc("forward":4294967295:2955)
+#loc5 = loc("forward":4294967295:2956)
+#loc6 = loc("forward":4294967295:2958)
+#loc7 = loc("forward":4294967295:2960)
+#loc8 = loc("forward":4294967295:2961)
+#loc9 = loc("forward":4294967295:2962)
+#loc10 = loc("forward":4294967295:2963)
+#loc11 = loc("forward":4294967295:2964)
+#loc12 = loc("forward":4294967295:2966)
+#loc13 = loc("forward":4294967295:2967)
+#loc14 = loc("forward":4294967295:2968)
+#loc15 = loc("forward":4294967295:2970)
+#loc16 = loc("forward":4294967295:2972)
+#loc17 = loc("forward":4294967295:2973)
+#loc18 = loc("forward":4294967295:2974)
+#loc19 = loc("forward":4294967295:2975)
+#loc20 = loc("forward":4294967295:2977)
+#loc21 = loc("forward":4294967295:2978)
+#loc22 = loc("forward":4294967295:2979)
+#loc23 = loc("forward":4294967295:2981)
+#loc24 = loc("forward":4294967295:2983)
+#loc25 = loc("forward":4294967295:2984)
+#loc26 = loc("forward":4294967295:2985)
+#loc27 = loc("forward":4294967295:2986)
+#loc28 = loc("forward":4294967295:2988)
+#loc29 = loc("forward":4294967295:2989)
+#loc30 = loc("forward":4294967295:2990)
+#loc31 = loc("forward":4294967295:2992)
+#loc32 = loc("forward":4294967295:2994)
+#loc33 = loc("forward":4294967295:2995)
+#loc34 = loc("forward":4294967295:2996)
+#loc35 = loc("forward":4294967295:2998)
+#loc36 = loc("forward":4294967295:2999)
+#loc37 = loc("forward":4294967295:3000)
+#loc38 = loc("forward":4294967295:3002)
+#loc39 = loc("forward":4294967295:3004)
+#loc40 = loc("forward":4294967295:3005)
+#loc41 = loc("forward":4294967295:3006)
+#loc42 = loc("forward":4294967295:3007)
+#loc43 = loc("forward":4294967295:3008)
+#loc44 = loc("forward":4294967295:3010)
+#loc45 = loc("forward":4294967295:3011)
+#loc46 = loc("forward":4294967295:3012)
+#loc47 = loc("forward":4294967295:3014)
+#loc48 = loc("forward":4294967295:3016)
+#loc49 = loc("forward":4294967295:3017)
+#loc50 = loc("forward":4294967295:3018)
+#loc51 = loc("forward":4294967295:3019)
+#loc52 = loc("forward":4294967295:3021)
+#loc53 = loc("forward":4294967295:3022)
+#loc54 = loc("forward":4294967295:3023)
+#loc55 = loc("forward":4294967295:3025)
+#loc56 = loc("forward":4294967295:3027)
+#loc57 = loc("forward":4294967295:3028)
+#loc58 = loc("forward":4294967295:3029)
+#loc59 = loc("forward":4294967295:3030)
+#loc60 = loc("forward":4294967295:3032)
+#loc61 = loc("forward":4294967295:3033)
+#loc62 = loc("forward":4294967295:3034)
+#loc63 = loc("forward":4294967295:3036)
+#loc64 = loc("forward":4294967295:3038)
+#loc65 = loc("forward":4294967295:3039)
+#loc66 = loc("forward":4294967295:3040)
+#loc67 = loc("forward":4294967295:3041)
+#loc68 = loc("forward":4294967295:3042)
+#loc69 = loc("forward":4294967295:3044)
+#loc70 = loc("forward":4294967295:3045)
+#loc71 = loc("forward":4294967295:3046)
+#loc72 = loc("forward":4294967295:3048)
+#loc73 = loc("forward":4294967295:3050)
+#loc74 = loc("forward":4294967295:3051)
+#loc75 = loc("forward":4294967295:3052)
+#loc76 = loc("forward":4294967295:3053)
+#loc77 = loc("forward":4294967295:3055)
+#loc78 = loc("forward":4294967295:3056)
+#loc79 = loc("forward":4294967295:3057)
+#loc80 = loc("forward":4294967295:3059)
+#loc81 = loc("forward":4294967295:3061)
+#loc82 = loc("forward":4294967295:3062)
+#loc83 = loc("forward":4294967295:3063)
+#loc84 = loc("forward":4294967295:3064)
+#loc85 = loc("forward":4294967295:3066)
+#loc86 = loc("forward":4294967295:3067)
+#loc87 = loc("forward":4294967295:3068)
+#loc88 = loc("forward":4294967295:3070)
+#loc89 = loc("forward":4294967295:3072)
+#loc90 = loc("forward":4294967295:3073)
+#loc91 = loc("forward":4294967295:3074)
+#loc92 = loc("forward":4294967295:3075)
+#loc93 = loc("forward":4294967295:3076)
+#loc94 = loc("forward":4294967295:3078)
+#loc95 = loc("forward":4294967295:3079)
+#loc96 = loc("forward":4294967295:3080)
+#loc97 = loc("forward":4294967295:3082)
+#loc98 = loc("forward":4294967295:3084)
+#loc99 = loc("forward":4294967295:3085)
+#loc100 = loc("forward":4294967295:3086)
+#loc101 = loc("forward":4294967295:3087)
+#loc102 = loc("forward":4294967295:3089)
+#loc103 = loc("forward":4294967295:3090)
+#loc104 = loc("forward":4294967295:3091)
+#loc105 = loc("forward":4294967295:3093)
+#loc106 = loc("forward":4294967295:3095)
+#loc107 = loc("forward":4294967295:3096)
+#loc108 = loc("forward":4294967295:3097)
+#loc109 = loc("forward":4294967295:3098)
+#loc110 = loc("forward":4294967295:3100)
+#loc111 = loc("forward":4294967295:3101)
+#loc112 = loc("forward":4294967295:3102)
+#loc113 = loc("forward":4294967295:3104)
+#loc114 = loc("forward":4294967295:3106)
+#loc115 = loc("forward":4294967295:3107)
+#loc116 = loc("forward":4294967295:3108)
+#loc117 = loc("forward":4294967295:3110)
+#loc118 = loc("forward":4294967295:3111)
+#loc119 = loc("forward":4294967295:3112)
+#loc120 = loc("forward":4294967295:3114)
+#loc121 = loc("forward":4294967295:3116)
+#loc122 = loc("forward":4294967295:3117)
+#loc123 = loc("forward":4294967295:3118)
+#loc124 = loc("forward":4294967295:3119)
+#loc125 = loc("forward":4294967295:3120)
+#loc126 = loc("forward":4294967295:3122)
+#loc127 = loc("forward":4294967295:3123)
+#loc128 = loc("forward":4294967295:3124)
+#loc129 = loc("forward":4294967295:3126)
+#loc130 = loc("forward":4294967295:3128)
+#loc131 = loc("forward":4294967295:3129)
+#loc132 = loc("forward":4294967295:3130)
+#loc133 = loc("forward":4294967295:3131)
+#loc134 = loc("forward":4294967295:3133)
+#loc135 = loc("forward":4294967295:3134)
+#loc136 = loc("forward":4294967295:3135)
+#loc137 = loc("forward":4294967295:3137)
+#loc138 = loc("forward":4294967295:3139)
+#loc139 = loc("forward":4294967295:3140)
+#loc140 = loc("forward":4294967295:3141)
+#loc141 = loc("forward":4294967295:3142)
+#loc142 = loc("forward":4294967295:3144)
+#loc143 = loc("forward":4294967295:3145)
+#loc144 = loc("forward":4294967295:3146)
+#loc145 = loc("forward":4294967295:3148)
+#loc146 = loc("forward":4294967295:3150)
+#loc147 = loc("forward":4294967295:3151)
+#loc148 = loc("forward":4294967295:3152)
+#loc149 = loc("forward":4294967295:3153)
+#loc150 = loc("forward":4294967295:3154)
+#loc151 = loc("forward":4294967295:3156)
+#loc152 = loc("forward":4294967295:3157)
+#loc153 = loc("forward":4294967295:3158)
+#loc154 = loc("forward":4294967295:3160)
+#loc155 = loc("forward":4294967295:3162)
+#loc156 = loc("forward":4294967295:3163)
+#loc157 = loc("forward":4294967295:3164)
+#loc158 = loc("forward":4294967295:3165)
+#loc159 = loc("forward":4294967295:3167)
+#loc160 = loc("forward":4294967295:3168)
+#loc161 = loc("forward":4294967295:3169)
+#loc162 = loc("forward":4294967295:3171)
+#loc163 = loc("forward":4294967295:3173)
+#loc164 = loc("forward":4294967295:3174)
+#loc165 = loc("forward":4294967295:3175)
+#loc166 = loc("forward":4294967295:3176)
+#loc167 = loc("forward":4294967295:3178)
+#loc168 = loc("forward":4294967295:3179)
+#loc169 = loc("forward":4294967295:3180)
+#loc170 = loc("forward":4294967295:3182)
+#loc171 = loc("forward":4294967295:3184)
+#loc172 = loc("forward":4294967295:3185)
+#loc173 = loc("forward":4294967295:3186)
+#loc174 = loc("forward":4294967295:3187)
+#loc175 = loc("forward":4294967295:3188)
+#loc176 = loc("forward":4294967295:3190)
+#loc177 = loc("forward":4294967295:3191)
+#loc178 = loc("forward":4294967295:3192)
+#loc179 = loc("forward":4294967295:3194)
+#loc180 = loc("forward":4294967295:3196)
+#loc181 = loc("forward":4294967295:3197)
+#loc182 = loc("forward":4294967295:3198)
+#loc183 = loc("forward":4294967295:3199)
+#loc184 = loc("forward":4294967295:3201)
+#loc185 = loc("forward":4294967295:3202)
+#loc186 = loc("forward":4294967295:3203)
+#loc187 = loc("forward":4294967295:3205)
+#loc188 = loc("forward":4294967295:3207)
+#loc189 = loc("forward":4294967295:3208)
+#loc190 = loc("forward":4294967295:3209)
+#loc191 = loc("forward":4294967295:3210)
+#loc192 = loc("forward":4294967295:3212)
+#loc193 = loc("forward":4294967295:3213)
+#loc194 = loc("forward":4294967295:3214)
+#loc195 = loc("forward":4294967295:3216)
+#loc196 = loc("forward":4294967295:3218)
+#loc197 = loc("forward":4294967295:3219)
+#loc198 = loc("forward":4294967295:3220)
+#loc199 = loc("forward":4294967295:3221)
+#loc200 = loc("forward":4294967295:3222)
+#loc201 = loc("forward":4294967295:3224)
+#loc202 = loc("forward":4294967295:3225)
+#loc203 = loc("forward":4294967295:3226)
+#loc204 = loc("forward":4294967295:3228)
+#loc205 = loc("forward":4294967295:3230)
+#loc206 = loc("forward":4294967295:3231)
+#loc207 = loc("forward":4294967295:3232)
+#loc208 = loc("forward":4294967295:3233)
+#loc209 = loc("forward":4294967295:3235)
+#loc210 = loc("forward":4294967295:3236)
+#loc211 = loc("forward":4294967295:3237)
+#loc212 = loc("forward":4294967295:3239)
+#loc213 = loc("forward":4294967295:3241)
+#loc214 = loc("forward":4294967295:3242)
+#loc215 = loc("forward":4294967295:3243)
+#loc216 = loc("forward":4294967295:3244)
+#loc217 = loc("forward":4294967295:3246)
+#loc218 = loc("forward":4294967295:3247)
+#loc219 = loc("forward":4294967295:3248)
+#loc220 = loc("forward":4294967295:3250)
+#loc221 = loc("forward":4294967295:3252)
+#loc222 = loc("forward":4294967295:3253)
+#loc223 = loc("forward":4294967295:3254)
+#loc224 = loc("forward":4294967295:3256)
+#loc225 = loc("forward":4294967295:3257)
+#loc226 = loc("forward":4294967295:3258)
+#loc227 = loc("forward":4294967295:3260)
+#loc228 = loc("forward":4294967295:3262)
+#loc229 = loc("forward":4294967295:3263)
+#loc230 = loc("forward":4294967295:3264)
+#loc231 = loc("forward":4294967295:3265)
+#loc232 = loc("forward":4294967295:3266)
+#loc233 = loc("forward":4294967295:3268)
+#loc234 = loc("forward":4294967295:3269)
+#loc235 = loc("forward":4294967295:3270)
+#loc236 = loc("forward":4294967295:3272)
+#loc237 = loc("forward":4294967295:3274)
+#loc238 = loc("forward":4294967295:3275)
+#loc239 = loc("forward":4294967295:3276)
+#loc240 = loc("forward":4294967295:3277)
+#loc241 = loc("forward":4294967295:3279)
+#loc242 = loc("forward":4294967295:3280)
+#loc243 = loc("forward":4294967295:3281)
+#loc244 = loc("forward":4294967295:3283)
+#loc245 = loc("forward":4294967295:3285)
+#loc246 = loc("forward":4294967295:3286)
+#loc247 = loc("forward":4294967295:3287)
+#loc248 = loc("forward":4294967295:3288)
+#loc249 = loc("forward":4294967295:3290)
+#loc250 = loc("forward":4294967295:3291)
+#loc251 = loc("forward":4294967295:3292)
+#loc252 = loc("forward":4294967295:3294)
+#loc253 = loc("forward":4294967295:3296)
+#loc254 = loc("forward":4294967295:3297)
+#loc255 = loc("forward":4294967295:3298)
+#loc256 = loc("forward":4294967295:3299)
+#loc257 = loc("forward":4294967295:3300)
+#loc258 = loc("forward":4294967295:3302)
+#loc259 = loc("forward":4294967295:3303)
+#loc260 = loc("forward":4294967295:3304)
+#loc261 = loc("forward":4294967295:3306)
+#loc262 = loc("forward":4294967295:3308)
+#loc263 = loc("forward":4294967295:3309)
+#loc264 = loc("forward":4294967295:3310)
+#loc265 = loc("forward":4294967295:3311)
+#loc266 = loc("forward":4294967295:3313)
+#loc267 = loc("forward":4294967295:3314)
+#loc268 = loc("forward":4294967295:3315)
+#loc269 = loc("forward":4294967295:3317)
+#loc270 = loc("forward":4294967295:3319)
+#loc271 = loc("forward":4294967295:3320)
+#loc272 = loc("forward":4294967295:3321)
+#loc273 = loc("forward":4294967295:3322)
+#loc274 = loc("forward":4294967295:3324)
+#loc275 = loc("forward":4294967295:3325)
+#loc276 = loc("forward":4294967295:3326)
+#loc277 = loc("forward":4294967295:3328)
+#loc278 = loc("forward":4294967295:3330)
+#loc279 = loc("forward":4294967295:3331)
+#loc280 = loc("forward":4294967295:3332)
+#loc281 = loc("forward":4294967295:3333)
+#loc282 = loc("forward":4294967295:3334)
+#loc283 = loc("forward":4294967295:3336)
+#loc284 = loc("forward":4294967295:3337)
+#loc285 = loc("forward":4294967295:3338)
+#loc286 = loc("forward":4294967295:3340)
+#loc287 = loc("forward":4294967295:3342)
+#loc288 = loc("forward":4294967295:3343)
+#loc289 = loc("forward":4294967295:3344)
+#loc290 = loc("forward":4294967295:3345)
+#loc291 = loc("forward":4294967295:3347)
+#loc292 = loc("forward":4294967295:3348)
+#loc293 = loc("forward":4294967295:3349)
+#loc294 = loc("forward":4294967295:3351)
+#loc295 = loc("forward":4294967295:3353)
+#loc296 = loc("forward":4294967295:3354)
+#loc297 = loc("forward":4294967295:3355)
+#loc298 = loc("forward":4294967295:3356)
+#loc299 = loc("forward":4294967295:3358)
+#loc300 = loc("forward":4294967295:3359)
+#loc301 = loc("forward":4294967295:3360)
+#loc302 = loc("forward":4294967295:3362)
+#loc303 = loc("forward":4294967295:3364)
+#loc304 = loc("forward":4294967295:3365)
+#loc305 = loc("forward":4294967295:3366)
+#loc306 = loc("forward":4294967295:3367)
+#loc307 = loc("forward":4294967295:3368)
+#loc308 = loc("forward":4294967295:3370)
+#loc309 = loc("forward":4294967295:3371)
+#loc310 = loc("forward":4294967295:3372)
+#loc311 = loc("forward":4294967295:3374)
+#loc312 = loc("forward":4294967295:3376)
+#loc313 = loc("forward":4294967295:3377)
+#loc314 = loc("forward":4294967295:3378)
+#loc315 = loc("forward":4294967295:3379)
+#loc316 = loc("forward":4294967295:3381)
+#loc317 = loc("forward":4294967295:3382)
+#loc318 = loc("forward":4294967295:3383)
+#loc319 = loc("forward":4294967295:3385)
+#loc320 = loc("forward":4294967295:3387)
+#loc321 = loc("forward":4294967295:3388)
+#loc322 = loc("forward":4294967295:3389)
+#loc323 = loc("forward":4294967295:3390)
+#loc324 = loc("forward":4294967295:3392)
+#loc325 = loc("forward":4294967295:3393)
+#loc326 = loc("forward":4294967295:3394)
+#loc327 = loc("forward":4294967295:3396)
+#loc328 = loc("forward":4294967295:3398)
+#loc329 = loc("forward":4294967295:3399)
+#loc330 = loc("forward":4294967295:3400)
+#loc331 = loc("forward":4294967295:3401)
+#loc332 = loc("forward":4294967295:3402)
+#loc333 = loc("forward":4294967295:3404)
+#loc334 = loc("forward":4294967295:3405)
+#loc335 = loc("forward":4294967295:3406)
+#loc336 = loc("forward":4294967295:3408)
+#loc337 = loc("forward":4294967295:3410)
+#loc338 = loc("forward":4294967295:3411)
+#loc339 = loc("forward":4294967295:3412)
+#loc340 = loc("forward":4294967295:3413)
+#loc341 = loc("forward":4294967295:3415)
+#loc342 = loc("forward":4294967295:3416)
+#loc343 = loc("forward":4294967295:3417)
+#loc344 = loc("forward":4294967295:3419)
+#loc345 = loc("forward":4294967295:3421)
+#loc346 = loc("forward":4294967295:3422)
+#loc347 = loc("forward":4294967295:3423)
+#loc348 = loc("forward":4294967295:3424)
+#loc349 = loc("forward":4294967295:3426)
+#loc350 = loc("forward":4294967295:3427)
+#loc351 = loc("forward":4294967295:3428)
+#loc352 = loc("forward":4294967295:3430)
+#loc353 = loc("forward":4294967295:3432)
+#loc354 = loc("forward":4294967295:3433)
+#loc355 = loc("forward":4294967295:3434)
+#loc356 = loc("forward":4294967295:3435)
+#loc357 = loc("forward":4294967295:3436)
+#loc358 = loc("forward":4294967295:3438)
+#loc359 = loc("forward":4294967295:3439)
+#loc360 = loc("forward":4294967295:3440)
+#loc361 = loc("forward":4294967295:3442)
+#loc362 = loc("forward":4294967295:3444)
+#loc363 = loc("forward":4294967295:3445)
+#loc364 = loc("forward":4294967295:3446)
+#loc365 = loc("forward":4294967295:3447)
+#loc366 = loc("forward":4294967295:3449)
+#loc367 = loc("forward":4294967295:3450)
+#loc368 = loc("forward":4294967295:3451)
+#loc369 = loc("forward":4294967295:3453)
+#loc370 = loc("forward":4294967295:3455)
+#loc371 = loc("forward":4294967295:3456)
+#loc372 = loc("forward":4294967295:3457)
+#loc373 = loc("forward":4294967295:3458)
+#loc374 = loc("forward":4294967295:3460)
+#loc375 = loc("forward":4294967295:3461)
+#loc376 = loc("forward":4294967295:3462)
+#loc377 = loc("forward":4294967295:3464)
+#loc378 = loc("forward":4294967295:3466)
+#loc379 = loc("forward":4294967295:3467)
+#loc380 = loc("forward":4294967295:3468)
+#loc381 = loc("forward":4294967295:3470)
+#loc382 = loc("forward":4294967295:3471)
+#loc383 = loc("forward":4294967295:3472)
+#loc384 = loc("forward":4294967295:3474)
+#loc385 = loc("forward":4294967295:3476)
+#loc386 = loc("forward":4294967295:3477)
+#loc387 = loc("forward":4294967295:3478)
+#loc388 = loc("forward":4294967295:3479)
+#loc389 = loc("forward":4294967295:3480)
+#loc390 = loc("forward":4294967295:3482)
+#loc391 = loc("forward":4294967295:3483)
+#loc392 = loc("forward":4294967295:3484)
+#loc393 = loc("forward":4294967295:3486)
+#loc394 = loc("forward":4294967295:3488)
+#loc395 = loc("forward":4294967295:3489)
+#loc396 = loc("forward":4294967295:3490)
+#loc397 = loc("forward":4294967295:3491)
+#loc398 = loc("forward":4294967295:3493)
+#loc399 = loc("forward":4294967295:3494)
+#loc400 = loc("forward":4294967295:3495)
+#loc401 = loc("forward":4294967295:3497)
+#loc402 = loc("forward":4294967295:3499)
+#loc403 = loc("forward":4294967295:3500)
+#loc404 = loc("forward":4294967295:3501)
+#loc405 = loc("forward":4294967295:3502)
+#loc406 = loc("forward":4294967295:3504)
+#loc407 = loc("forward":4294967295:3505)
+#loc408 = loc("forward":4294967295:3506)
+#loc409 = loc("forward":4294967295:3508)
+#loc410 = loc("forward":4294967295:3510)
+#loc411 = loc("forward":4294967295:3511)
+#loc412 = loc("forward":4294967295:3512)
+#loc413 = loc("forward":4294967295:3513)
+#loc414 = loc("forward":4294967295:3514)
+#loc415 = loc("forward":4294967295:3516)
+#loc416 = loc("forward":4294967295:3517)
+#loc417 = loc("forward":4294967295:3518)
+#loc418 = loc("forward":4294967295:3520)
+#loc419 = loc("forward":4294967295:3522)
+#loc420 = loc("forward":4294967295:3523)
+#loc421 = loc("forward":4294967295:3524)
+#loc422 = loc("forward":4294967295:3525)
+#loc423 = loc("forward":4294967295:3527)
+#loc424 = loc("forward":4294967295:3528)
+#loc425 = loc("forward":4294967295:3529)
+#loc426 = loc("forward":4294967295:3531)
+#loc427 = loc("forward":4294967295:3533)
+#loc428 = loc("forward":4294967295:3534)
+#loc429 = loc("forward":4294967295:3535)
+#loc430 = loc("forward":4294967295:3536)
+#loc431 = loc("forward":4294967295:3538)
+#loc432 = loc("forward":4294967295:3539)
+#loc433 = loc("forward":4294967295:3540)
+#loc434 = loc("forward":4294967295:3542)
+#loc435 = loc("forward":4294967295:3544)
+#loc436 = loc("forward":4294967295:3545)
+#loc437 = loc("forward":4294967295:3546)
+#loc438 = loc("forward":4294967295:3547)
+#loc439 = loc("forward":4294967295:3548)
+#loc440 = loc("forward":4294967295:3549)
+#loc441 = loc("forward":4294967295:3550)
+#loc442 = loc("forward":4294967295:3551)
+#loc443 = loc("forward":4294967295:3552)
+#loc444 = loc("forward":4294967295:3554)
+#loc445 = loc("forward":4294967295:3556)
+#loc446 = loc(unknown)
+#loc447 = loc("conv2d_0.dc.transpose.0"(#loc1))
+#loc448 = loc("conv2d_0.dc.transpose.1"(#loc2))
+#loc449 = loc("conv2d_0.dc.conv2d.2"(#loc3))
+#loc450 = loc("conv2d_0.dc.transpose.3"(#loc4))
+#loc451 = loc("conv2d_0.dc.transpose.4"(#loc5))
+#loc452 = loc("multiply_8"(#loc6))
+#loc453 = loc("add_14"(#loc7))
+#loc454 = loc("relu_15"(#loc8))
+#loc455 = loc("max_pool2d_16"(#loc9))
+#loc456 = loc("conv2d_17.dc.transpose.0"(#loc10))
+#loc457 = loc("conv2d_17.dc.transpose.1"(#loc11))
+#loc458 = loc("conv2d_17.dc.conv2d.2"(#loc12))
+#loc459 = loc("conv2d_17.dc.transpose.3"(#loc13))
+#loc460 = loc("conv2d_17.dc.transpose.4"(#loc14))
+#loc461 = loc("multiply_25"(#loc15))
+#loc462 = loc("add_31"(#loc16))
+#loc463 = loc("relu_32"(#loc17))
+#loc464 = loc("conv2d_33.dc.transpose.0"(#loc18))
+#loc465 = loc("conv2d_33.dc.transpose.1"(#loc19))
+#loc466 = loc("conv2d_33.dc.conv2d.2"(#loc20))
+#loc467 = loc("conv2d_33.dc.transpose.3"(#loc21))
+#loc468 = loc("conv2d_33.dc.transpose.4"(#loc22))
+#loc469 = loc("multiply_41"(#loc23))
+#loc470 = loc("add_47"(#loc24))
+#loc471 = loc("relu_48"(#loc25))
+#loc472 = loc("conv2d_49.dc.transpose.0"(#loc26))
+#loc473 = loc("conv2d_49.dc.transpose.1"(#loc27))
+#loc474 = loc("conv2d_49.dc.conv2d.2"(#loc28))
+#loc475 = loc("conv2d_49.dc.transpose.3"(#loc29))
+#loc476 = loc("conv2d_49.dc.transpose.4"(#loc30))
+#loc477 = loc("multiply_57"(#loc31))
+#loc478 = loc("add_63"(#loc32))
+#loc479 = loc("conv2d_64.dc.transpose.0"(#loc33))
+#loc480 = loc("conv2d_64.dc.transpose.1"(#loc34))
+#loc481 = loc("conv2d_64.dc.conv2d.2"(#loc35))
+#loc482 = loc("conv2d_64.dc.transpose.3"(#loc36))
+#loc483 = loc("conv2d_64.dc.transpose.4"(#loc37))
+#loc484 = loc("multiply_72"(#loc38))
+#loc485 = loc("add_78"(#loc39))
+#loc486 = loc("add_79"(#loc40))
+#loc487 = loc("relu_80"(#loc41))
+#loc488 = loc("conv2d_81.dc.transpose.0"(#loc42))
+#loc489 = loc("conv2d_81.dc.transpose.1"(#loc43))
+#loc490 = loc("conv2d_81.dc.conv2d.2"(#loc44))
+#loc491 = loc("conv2d_81.dc.transpose.3"(#loc45))
+#loc492 = loc("conv2d_81.dc.transpose.4"(#loc46))
+#loc493 = loc("multiply_89"(#loc47))
+#loc494 = loc("add_95"(#loc48))
+#loc495 = loc("relu_96"(#loc49))
+#loc496 = loc("conv2d_97.dc.transpose.0"(#loc50))
+#loc497 = loc("conv2d_97.dc.transpose.1"(#loc51))
+#loc498 = loc("conv2d_97.dc.conv2d.2"(#loc52))
+#loc499 = loc("conv2d_97.dc.transpose.3"(#loc53))
+#loc500 = loc("conv2d_97.dc.transpose.4"(#loc54))
+#loc501 = loc("multiply_105"(#loc55))
+#loc502 = loc("add_111"(#loc56))
+#loc503 = loc("relu_112"(#loc57))
+#loc504 = loc("conv2d_113.dc.transpose.0"(#loc58))
+#loc505 = loc("conv2d_113.dc.transpose.1"(#loc59))
+#loc506 = loc("conv2d_113.dc.conv2d.2"(#loc60))
+#loc507 = loc("conv2d_113.dc.transpose.3"(#loc61))
+#loc508 = loc("conv2d_113.dc.transpose.4"(#loc62))
+#loc509 = loc("multiply_121"(#loc63))
+#loc510 = loc("add_127"(#loc64))
+#loc511 = loc("add_128"(#loc65))
+#loc512 = loc("relu_129"(#loc66))
+#loc513 = loc("conv2d_130.dc.transpose.0"(#loc67))
+#loc514 = loc("conv2d_130.dc.transpose.1"(#loc68))
+#loc515 = loc("conv2d_130.dc.conv2d.2"(#loc69))
+#loc516 = loc("conv2d_130.dc.transpose.3"(#loc70))
+#loc517 = loc("conv2d_130.dc.transpose.4"(#loc71))
+#loc518 = loc("multiply_138"(#loc72))
+#loc519 = loc("add_144"(#loc73))
+#loc520 = loc("relu_145"(#loc74))
+#loc521 = loc("conv2d_146.dc.transpose.0"(#loc75))
+#loc522 = loc("conv2d_146.dc.transpose.1"(#loc76))
+#loc523 = loc("conv2d_146.dc.conv2d.2"(#loc77))
+#loc524 = loc("conv2d_146.dc.transpose.3"(#loc78))
+#loc525 = loc("conv2d_146.dc.transpose.4"(#loc79))
+#loc526 = loc("multiply_154"(#loc80))
+#loc527 = loc("add_160"(#loc81))
+#loc528 = loc("relu_161"(#loc82))
+#loc529 = loc("conv2d_162.dc.transpose.0"(#loc83))
+#loc530 = loc("conv2d_162.dc.transpose.1"(#loc84))
+#loc531 = loc("conv2d_162.dc.conv2d.2"(#loc85))
+#loc532 = loc("conv2d_162.dc.transpose.3"(#loc86))
+#loc533 = loc("conv2d_162.dc.transpose.4"(#loc87))
+#loc534 = loc("multiply_170"(#loc88))
+#loc535 = loc("add_176"(#loc89))
+#loc536 = loc("add_177"(#loc90))
+#loc537 = loc("relu_178"(#loc91))
+#loc538 = loc("conv2d_179.dc.transpose.0"(#loc92))
+#loc539 = loc("conv2d_179.dc.transpose.1"(#loc93))
+#loc540 = loc("conv2d_179.dc.conv2d.2"(#loc94))
+#loc541 = loc("conv2d_179.dc.transpose.3"(#loc95))
+#loc542 = loc("conv2d_179.dc.transpose.4"(#loc96))
+#loc543 = loc("multiply_187"(#loc97))
+#loc544 = loc("add_193"(#loc98))
+#loc545 = loc("relu_194"(#loc99))
+#loc546 = loc("conv2d_195.dc.transpose.0"(#loc100))
+#loc547 = loc("conv2d_195.dc.transpose.1"(#loc101))
+#loc548 = loc("conv2d_195.dc.conv2d.2"(#loc102))
+#loc549 = loc("conv2d_195.dc.transpose.3"(#loc103))
+#loc550 = loc("conv2d_195.dc.transpose.4"(#loc104))
+#loc551 = loc("multiply_203"(#loc105))
+#loc552 = loc("add_209"(#loc106))
+#loc553 = loc("relu_210"(#loc107))
+#loc554 = loc("conv2d_211.dc.transpose.0"(#loc108))
+#loc555 = loc("conv2d_211.dc.transpose.1"(#loc109))
+#loc556 = loc("conv2d_211.dc.conv2d.2"(#loc110))
+#loc557 = loc("conv2d_211.dc.transpose.3"(#loc111))
+#loc558 = loc("conv2d_211.dc.transpose.4"(#loc112))
+#loc559 = loc("multiply_219"(#loc113))
+#loc560 = loc("add_225"(#loc114))
+#loc561 = loc("conv2d_226.dc.transpose.0"(#loc115))
+#loc562 = loc("conv2d_226.dc.transpose.1"(#loc116))
+#loc563 = loc("conv2d_226.dc.conv2d.2"(#loc117))
+#loc564 = loc("conv2d_226.dc.transpose.3"(#loc118))
+#loc565 = loc("conv2d_226.dc.transpose.4"(#loc119))
+#loc566 = loc("multiply_234"(#loc120))
+#loc567 = loc("add_240"(#loc121))
+#loc568 = loc("add_241"(#loc122))
+#loc569 = loc("relu_242"(#loc123))
+#loc570 = loc("conv2d_243.dc.transpose.0"(#loc124))
+#loc571 = loc("conv2d_243.dc.transpose.1"(#loc125))
+#loc572 = loc("conv2d_243.dc.conv2d.2"(#loc126))
+#loc573 = loc("conv2d_243.dc.transpose.3"(#loc127))
+#loc574 = loc("conv2d_243.dc.transpose.4"(#loc128))
+#loc575 = loc("multiply_251"(#loc129))
+#loc576 = loc("add_257"(#loc130))
+#loc577 = loc("relu_258"(#loc131))
+#loc578 = loc("conv2d_259.dc.transpose.0"(#loc132))
+#loc579 = loc("conv2d_259.dc.transpose.1"(#loc133))
+#loc580 = loc("conv2d_259.dc.conv2d.2"(#loc134))
+#loc581 = loc("conv2d_259.dc.transpose.3"(#loc135))
+#loc582 = loc("conv2d_259.dc.transpose.4"(#loc136))
+#loc583 = loc("multiply_267"(#loc137))
+#loc584 = loc("add_273"(#loc138))
+#loc585 = loc("relu_274"(#loc139))
+#loc586 = loc("conv2d_275.dc.transpose.0"(#loc140))
+#loc587 = loc("conv2d_275.dc.transpose.1"(#loc141))
+#loc588 = loc("conv2d_275.dc.conv2d.2"(#loc142))
+#loc589 = loc("conv2d_275.dc.transpose.3"(#loc143))
+#loc590 = loc("conv2d_275.dc.transpose.4"(#loc144))
+#loc591 = loc("multiply_283"(#loc145))
+#loc592 = loc("add_289"(#loc146))
+#loc593 = loc("add_290"(#loc147))
+#loc594 = loc("relu_291"(#loc148))
+#loc595 = loc("conv2d_292.dc.transpose.0"(#loc149))
+#loc596 = loc("conv2d_292.dc.transpose.1"(#loc150))
+#loc597 = loc("conv2d_292.dc.conv2d.2"(#loc151))
+#loc598 = loc("conv2d_292.dc.transpose.3"(#loc152))
+#loc599 = loc("conv2d_292.dc.transpose.4"(#loc153))
+#loc600 = loc("multiply_300"(#loc154))
+#loc601 = loc("add_306"(#loc155))
+#loc602 = loc("relu_307"(#loc156))
+#loc603 = loc("conv2d_308.dc.transpose.0"(#loc157))
+#loc604 = loc("conv2d_308.dc.transpose.1"(#loc158))
+#loc605 = loc("conv2d_308.dc.conv2d.2"(#loc159))
+#loc606 = loc("conv2d_308.dc.transpose.3"(#loc160))
+#loc607 = loc("conv2d_308.dc.transpose.4"(#loc161))
+#loc608 = loc("multiply_316"(#loc162))
+#loc609 = loc("add_322"(#loc163))
+#loc610 = loc("relu_323"(#loc164))
+#loc611 = loc("conv2d_324.dc.transpose.0"(#loc165))
+#loc612 = loc("conv2d_324.dc.transpose.1"(#loc166))
+#loc613 = loc("conv2d_324.dc.conv2d.2"(#loc167))
+#loc614 = loc("conv2d_324.dc.transpose.3"(#loc168))
+#loc615 = loc("conv2d_324.dc.transpose.4"(#loc169))
+#loc616 = loc("multiply_332"(#loc170))
+#loc617 = loc("add_338"(#loc171))
+#loc618 = loc("add_339"(#loc172))
+#loc619 = loc("relu_340"(#loc173))
+#loc620 = loc("conv2d_341.dc.transpose.0"(#loc174))
+#loc621 = loc("conv2d_341.dc.transpose.1"(#loc175))
+#loc622 = loc("conv2d_341.dc.conv2d.2"(#loc176))
+#loc623 = loc("conv2d_341.dc.transpose.3"(#loc177))
+#loc624 = loc("conv2d_341.dc.transpose.4"(#loc178))
+#loc625 = loc("multiply_349"(#loc179))
+#loc626 = loc("add_355"(#loc180))
+#loc627 = loc("relu_356"(#loc181))
+#loc628 = loc("conv2d_357.dc.transpose.0"(#loc182))
+#loc629 = loc("conv2d_357.dc.transpose.1"(#loc183))
+#loc630 = loc("conv2d_357.dc.conv2d.2"(#loc184))
+#loc631 = loc("conv2d_357.dc.transpose.3"(#loc185))
+#loc632 = loc("conv2d_357.dc.transpose.4"(#loc186))
+#loc633 = loc("multiply_365"(#loc187))
+#loc634 = loc("add_371"(#loc188))
+#loc635 = loc("relu_372"(#loc189))
+#loc636 = loc("conv2d_373.dc.transpose.0"(#loc190))
+#loc637 = loc("conv2d_373.dc.transpose.1"(#loc191))
+#loc638 = loc("conv2d_373.dc.conv2d.2"(#loc192))
+#loc639 = loc("conv2d_373.dc.transpose.3"(#loc193))
+#loc640 = loc("conv2d_373.dc.transpose.4"(#loc194))
+#loc641 = loc("multiply_381"(#loc195))
+#loc642 = loc("add_387"(#loc196))
+#loc643 = loc("add_388"(#loc197))
+#loc644 = loc("relu_389"(#loc198))
+#loc645 = loc("conv2d_390.dc.transpose.0"(#loc199))
+#loc646 = loc("conv2d_390.dc.transpose.1"(#loc200))
+#loc647 = loc("conv2d_390.dc.conv2d.2"(#loc201))
+#loc648 = loc("conv2d_390.dc.transpose.3"(#loc202))
+#loc649 = loc("conv2d_390.dc.transpose.4"(#loc203))
+#loc650 = loc("multiply_398"(#loc204))
+#loc651 = loc("add_404"(#loc205))
+#loc652 = loc("relu_405"(#loc206))
+#loc653 = loc("conv2d_406.dc.transpose.0"(#loc207))
+#loc654 = loc("conv2d_406.dc.transpose.1"(#loc208))
+#loc655 = loc("conv2d_406.dc.conv2d.2"(#loc209))
+#loc656 = loc("conv2d_406.dc.transpose.3"(#loc210))
+#loc657 = loc("conv2d_406.dc.transpose.4"(#loc211))
+#loc658 = loc("multiply_414"(#loc212))
+#loc659 = loc("add_420"(#loc213))
+#loc660 = loc("relu_421"(#loc214))
+#loc661 = loc("conv2d_422.dc.transpose.0"(#loc215))
+#loc662 = loc("conv2d_422.dc.transpose.1"(#loc216))
+#loc663 = loc("conv2d_422.dc.conv2d.2"(#loc217))
+#loc664 = loc("conv2d_422.dc.transpose.3"(#loc218))
+#loc665 = loc("conv2d_422.dc.transpose.4"(#loc219))
+#loc666 = loc("multiply_430"(#loc220))
+#loc667 = loc("add_436"(#loc221))
+#loc668 = loc("conv2d_437.dc.transpose.0"(#loc222))
+#loc669 = loc("conv2d_437.dc.transpose.1"(#loc223))
+#loc670 = loc("conv2d_437.dc.conv2d.2"(#loc224))
+#loc671 = loc("conv2d_437.dc.transpose.3"(#loc225))
+#loc672 = loc("conv2d_437.dc.transpose.4"(#loc226))
+#loc673 = loc("multiply_445"(#loc227))
+#loc674 = loc("add_451"(#loc228))
+#loc675 = loc("add_452"(#loc229))
+#loc676 = loc("relu_453"(#loc230))
+#loc677 = loc("conv2d_454.dc.transpose.0"(#loc231))
+#loc678 = loc("conv2d_454.dc.transpose.1"(#loc232))
+#loc679 = loc("conv2d_454.dc.conv2d.2"(#loc233))
+#loc680 = loc("conv2d_454.dc.transpose.3"(#loc234))
+#loc681 = loc("conv2d_454.dc.transpose.4"(#loc235))
+#loc682 = loc("multiply_462"(#loc236))
+#loc683 = loc("add_468"(#loc237))
+#loc684 = loc("relu_469"(#loc238))
+#loc685 = loc("conv2d_470.dc.transpose.0"(#loc239))
+#loc686 = loc("conv2d_470.dc.transpose.1"(#loc240))
+#loc687 = loc("conv2d_470.dc.conv2d.2"(#loc241))
+#loc688 = loc("conv2d_470.dc.transpose.3"(#loc242))
+#loc689 = loc("conv2d_470.dc.transpose.4"(#loc243))
+#loc690 = loc("multiply_478"(#loc244))
+#loc691 = loc("add_484"(#loc245))
+#loc692 = loc("relu_485"(#loc246))
+#loc693 = loc("conv2d_486.dc.transpose.0"(#loc247))
+#loc694 = loc("conv2d_486.dc.transpose.1"(#loc248))
+#loc695 = loc("conv2d_486.dc.conv2d.2"(#loc249))
+#loc696 = loc("conv2d_486.dc.transpose.3"(#loc250))
+#loc697 = loc("conv2d_486.dc.transpose.4"(#loc251))
+#loc698 = loc("multiply_494"(#loc252))
+#loc699 = loc("add_500"(#loc253))
+#loc700 = loc("add_501"(#loc254))
+#loc701 = loc("relu_502"(#loc255))
+#loc702 = loc("conv2d_503.dc.transpose.0"(#loc256))
+#loc703 = loc("conv2d_503.dc.transpose.1"(#loc257))
+#loc704 = loc("conv2d_503.dc.conv2d.2"(#loc258))
+#loc705 = loc("conv2d_503.dc.transpose.3"(#loc259))
+#loc706 = loc("conv2d_503.dc.transpose.4"(#loc260))
+#loc707 = loc("multiply_511"(#loc261))
+#loc708 = loc("add_517"(#loc262))
+#loc709 = loc("relu_518"(#loc263))
+#loc710 = loc("conv2d_519.dc.transpose.0"(#loc264))
+#loc711 = loc("conv2d_519.dc.transpose.1"(#loc265))
+#loc712 = loc("conv2d_519.dc.conv2d.2"(#loc266))
+#loc713 = loc("conv2d_519.dc.transpose.3"(#loc267))
+#loc714 = loc("conv2d_519.dc.transpose.4"(#loc268))
+#loc715 = loc("multiply_527"(#loc269))
+#loc716 = loc("add_533"(#loc270))
+#loc717 = loc("relu_534"(#loc271))
+#loc718 = loc("conv2d_535.dc.transpose.0"(#loc272))
+#loc719 = loc("conv2d_535.dc.transpose.1"(#loc273))
+#loc720 = loc("conv2d_535.dc.conv2d.2"(#loc274))
+#loc721 = loc("conv2d_535.dc.transpose.3"(#loc275))
+#loc722 = loc("conv2d_535.dc.transpose.4"(#loc276))
+#loc723 = loc("multiply_543"(#loc277))
+#loc724 = loc("add_549"(#loc278))
+#loc725 = loc("add_550"(#loc279))
+#loc726 = loc("relu_551"(#loc280))
+#loc727 = loc("conv2d_552.dc.transpose.0"(#loc281))
+#loc728 = loc("conv2d_552.dc.transpose.1"(#loc282))
+#loc729 = loc("conv2d_552.dc.conv2d.2"(#loc283))
+#loc730 = loc("conv2d_552.dc.transpose.3"(#loc284))
+#loc731 = loc("conv2d_552.dc.transpose.4"(#loc285))
+#loc732 = loc("multiply_560"(#loc286))
+#loc733 = loc("add_566"(#loc287))
+#loc734 = loc("relu_567"(#loc288))
+#loc735 = loc("conv2d_568.dc.transpose.0"(#loc289))
+#loc736 = loc("conv2d_568.dc.transpose.1"(#loc290))
+#loc737 = loc("conv2d_568.dc.conv2d.2"(#loc291))
+#loc738 = loc("conv2d_568.dc.transpose.3"(#loc292))
+#loc739 = loc("conv2d_568.dc.transpose.4"(#loc293))
+#loc740 = loc("multiply_576"(#loc294))
+#loc741 = loc("add_582"(#loc295))
+#loc742 = loc("relu_583"(#loc296))
+#loc743 = loc("conv2d_584.dc.transpose.0"(#loc297))
+#loc744 = loc("conv2d_584.dc.transpose.1"(#loc298))
+#loc745 = loc("conv2d_584.dc.conv2d.2"(#loc299))
+#loc746 = loc("conv2d_584.dc.transpose.3"(#loc300))
+#loc747 = loc("conv2d_584.dc.transpose.4"(#loc301))
+#loc748 = loc("multiply_592"(#loc302))
+#loc749 = loc("add_598"(#loc303))
+#loc750 = loc("add_599"(#loc304))
+#loc751 = loc("relu_600"(#loc305))
+#loc752 = loc("conv2d_601.dc.transpose.0"(#loc306))
+#loc753 = loc("conv2d_601.dc.transpose.1"(#loc307))
+#loc754 = loc("conv2d_601.dc.conv2d.2"(#loc308))
+#loc755 = loc("conv2d_601.dc.transpose.3"(#loc309))
+#loc756 = loc("conv2d_601.dc.transpose.4"(#loc310))
+#loc757 = loc("multiply_609"(#loc311))
+#loc758 = loc("add_615"(#loc312))
+#loc759 = loc("relu_616"(#loc313))
+#loc760 = loc("conv2d_617.dc.transpose.0"(#loc314))
+#loc761 = loc("conv2d_617.dc.transpose.1"(#loc315))
+#loc762 = loc("conv2d_617.dc.conv2d.2"(#loc316))
+#loc763 = loc("conv2d_617.dc.transpose.3"(#loc317))
+#loc764 = loc("conv2d_617.dc.transpose.4"(#loc318))
+#loc765 = loc("multiply_625"(#loc319))
+#loc766 = loc("add_631"(#loc320))
+#loc767 = loc("relu_632"(#loc321))
+#loc768 = loc("conv2d_633.dc.transpose.0"(#loc322))
+#loc769 = loc("conv2d_633.dc.transpose.1"(#loc323))
+#loc770 = loc("conv2d_633.dc.conv2d.2"(#loc324))
+#loc771 = loc("conv2d_633.dc.transpose.3"(#loc325))
+#loc772 = loc("conv2d_633.dc.transpose.4"(#loc326))
+#loc773 = loc("multiply_641"(#loc327))
+#loc774 = loc("add_647"(#loc328))
+#loc775 = loc("add_648"(#loc329))
+#loc776 = loc("relu_649"(#loc330))
+#loc777 = loc("conv2d_650.dc.transpose.0"(#loc331))
+#loc778 = loc("conv2d_650.dc.transpose.1"(#loc332))
+#loc779 = loc("conv2d_650.dc.conv2d.2"(#loc333))
+#loc780 = loc("conv2d_650.dc.transpose.3"(#loc334))
+#loc781 = loc("conv2d_650.dc.transpose.4"(#loc335))
+#loc782 = loc("multiply_658"(#loc336))
+#loc783 = loc("add_664"(#loc337))
+#loc784 = loc("relu_665"(#loc338))
+#loc785 = loc("conv2d_666.dc.transpose.0"(#loc339))
+#loc786 = loc("conv2d_666.dc.transpose.1"(#loc340))
+#loc787 = loc("conv2d_666.dc.conv2d.2"(#loc341))
+#loc788 = loc("conv2d_666.dc.transpose.3"(#loc342))
+#loc789 = loc("conv2d_666.dc.transpose.4"(#loc343))
+#loc790 = loc("multiply_674"(#loc344))
+#loc791 = loc("add_680"(#loc345))
+#loc792 = loc("relu_681"(#loc346))
+#loc793 = loc("conv2d_682.dc.transpose.0"(#loc347))
+#loc794 = loc("conv2d_682.dc.transpose.1"(#loc348))
+#loc795 = loc("conv2d_682.dc.conv2d.2"(#loc349))
+#loc796 = loc("conv2d_682.dc.transpose.3"(#loc350))
+#loc797 = loc("conv2d_682.dc.transpose.4"(#loc351))
+#loc798 = loc("multiply_690"(#loc352))
+#loc799 = loc("add_696"(#loc353))
+#loc800 = loc("add_697"(#loc354))
+#loc801 = loc("relu_698"(#loc355))
+#loc802 = loc("conv2d_699.dc.transpose.0"(#loc356))
+#loc803 = loc("conv2d_699.dc.transpose.1"(#loc357))
+#loc804 = loc("conv2d_699.dc.conv2d.2"(#loc358))
+#loc805 = loc("conv2d_699.dc.transpose.3"(#loc359))
+#loc806 = loc("conv2d_699.dc.transpose.4"(#loc360))
+#loc807 = loc("multiply_707"(#loc361))
+#loc808 = loc("add_713"(#loc362))
+#loc809 = loc("relu_714"(#loc363))
+#loc810 = loc("conv2d_715.dc.transpose.0"(#loc364))
+#loc811 = loc("conv2d_715.dc.transpose.1"(#loc365))
+#loc812 = loc("conv2d_715.dc.conv2d.2"(#loc366))
+#loc813 = loc("conv2d_715.dc.transpose.3"(#loc367))
+#loc814 = loc("conv2d_715.dc.transpose.4"(#loc368))
+#loc815 = loc("multiply_723"(#loc369))
+#loc816 = loc("add_729"(#loc370))
+#loc817 = loc("relu_730"(#loc371))
+#loc818 = loc("conv2d_731.dc.transpose.0"(#loc372))
+#loc819 = loc("conv2d_731.dc.transpose.1"(#loc373))
+#loc820 = loc("conv2d_731.dc.conv2d.2"(#loc374))
+#loc821 = loc("conv2d_731.dc.transpose.3"(#loc375))
+#loc822 = loc("conv2d_731.dc.transpose.4"(#loc376))
+#loc823 = loc("multiply_739"(#loc377))
+#loc824 = loc("add_745"(#loc378))
+#loc825 = loc("conv2d_746.dc.transpose.0"(#loc379))
+#loc826 = loc("conv2d_746.dc.transpose.1"(#loc380))
+#loc827 = loc("conv2d_746.dc.conv2d.2"(#loc381))
+#loc828 = loc("conv2d_746.dc.transpose.3"(#loc382))
+#loc829 = loc("conv2d_746.dc.transpose.4"(#loc383))
+#loc830 = loc("multiply_754"(#loc384))
+#loc831 = loc("add_760"(#loc385))
+#loc832 = loc("add_761"(#loc386))
+#loc833 = loc("relu_762"(#loc387))
+#loc834 = loc("conv2d_763.dc.transpose.0"(#loc388))
+#loc835 = loc("conv2d_763.dc.transpose.1"(#loc389))
+#loc836 = loc("conv2d_763.dc.conv2d.2"(#loc390))
+#loc837 = loc("conv2d_763.dc.transpose.3"(#loc391))
+#loc838 = loc("conv2d_763.dc.transpose.4"(#loc392))
+#loc839 = loc("multiply_771"(#loc393))
+#loc840 = loc("add_777"(#loc394))
+#loc841 = loc("relu_778"(#loc395))
+#loc842 = loc("conv2d_779.dc.transpose.0"(#loc396))
+#loc843 = loc("conv2d_779.dc.transpose.1"(#loc397))
+#loc844 = loc("conv2d_779.dc.conv2d.2"(#loc398))
+#loc845 = loc("conv2d_779.dc.transpose.3"(#loc399))
+#loc846 = loc("conv2d_779.dc.transpose.4"(#loc400))
+#loc847 = loc("multiply_787"(#loc401))
+#loc848 = loc("add_793"(#loc402))
+#loc849 = loc("relu_794"(#loc403))
+#loc850 = loc("conv2d_795.dc.transpose.0"(#loc404))
+#loc851 = loc("conv2d_795.dc.transpose.1"(#loc405))
+#loc852 = loc("conv2d_795.dc.conv2d.2"(#loc406))
+#loc853 = loc("conv2d_795.dc.transpose.3"(#loc407))
+#loc854 = loc("conv2d_795.dc.transpose.4"(#loc408))
+#loc855 = loc("multiply_803"(#loc409))
+#loc856 = loc("add_809"(#loc410))
+#loc857 = loc("add_810"(#loc411))
+#loc858 = loc("relu_811"(#loc412))
+#loc859 = loc("conv2d_812.dc.transpose.0"(#loc413))
+#loc860 = loc("conv2d_812.dc.transpose.1"(#loc414))
+#loc861 = loc("conv2d_812.dc.conv2d.2"(#loc415))
+#loc862 = loc("conv2d_812.dc.transpose.3"(#loc416))
+#loc863 = loc("conv2d_812.dc.transpose.4"(#loc417))
+#loc864 = loc("multiply_820"(#loc418))
+#loc865 = loc("add_826"(#loc419))
+#loc866 = loc("relu_827"(#loc420))
+#loc867 = loc("conv2d_828.dc.transpose.0"(#loc421))
+#loc868 = loc("conv2d_828.dc.transpose.1"(#loc422))
+#loc869 = loc("conv2d_828.dc.conv2d.2"(#loc423))
+#loc870 = loc("conv2d_828.dc.transpose.3"(#loc424))
+#loc871 = loc("conv2d_828.dc.transpose.4"(#loc425))
+#loc872 = loc("multiply_836"(#loc426))
+#loc873 = loc("add_842"(#loc427))
+#loc874 = loc("relu_843"(#loc428))
+#loc875 = loc("conv2d_844.dc.transpose.0"(#loc429))
+#loc876 = loc("conv2d_844.dc.transpose.1"(#loc430))
+#loc877 = loc("conv2d_844.dc.conv2d.2"(#loc431))
+#loc878 = loc("conv2d_844.dc.transpose.3"(#loc432))
+#loc879 = loc("conv2d_844.dc.transpose.4"(#loc433))
+#loc880 = loc("multiply_852"(#loc434))
+#loc881 = loc("add_858"(#loc435))
+#loc882 = loc("add_859"(#loc436))
+#loc883 = loc("relu_860"(#loc437))
+#loc884 = loc("avg_pool2d_861.dc.reshape.0"(#loc438))
+#loc885 = loc("avg_pool2d_861.dc.transpose.1.dc.transpose.0"(#loc439))
+#loc886 = loc("avg_pool2d_861.dc.reduce_avg.2"(#loc440))
+#loc887 = loc("avg_pool2d_861.dc.reshape.4"(#loc441))
+#loc888 = loc("squeeze_863"(#loc442))
+#loc889 = loc("squeeze_864"(#loc443))
+#loc890 = loc("matmul_866"(#loc444))
+#loc891 = loc("add_867"(#loc445))
diff --git a/tools/explorer/test/models/resnet_ttnn.mlir b/tools/explorer/test/models/resnet_ttnn.mlir
new file mode 100644
index 000000000..1b1343aa8
--- /dev/null
+++ b/tools/explorer/test/models/resnet_ttnn.mlir
@@ -0,0 +1,2102 @@
+
+#device = #tt.device<workerGrid = #tt.grid<8x8, (d0, d1) -> (0, d0, d1)>, l1Map = (d0, d1)[s0, s1] -> (0, d0 floordiv s0, d1 floordiv s1, (d0 mod s0) * s1 + d1 mod s1), dramMap = (d0, d1)[s0, s1] -> (0, 0, ((((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 8192) mod 12, (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 98304 + (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) mod 8192), meshShape = , chipIds = [0]>
+#dram = #tt.memory_space<dram>
+#loc = loc("ResNet":0:0)
+#system = #tt.memory_space<system>
+#system_desc = #tt.system_desc<[{arch = <wormhole_b0>, grid = 8x8, l1_size = 1499136, num_dram_channels = 12, dram_channel_size = 1073741824, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32, l1_unreserved_base = 1024, erisc_l1_unreserved_base = 1024, dram_unreserved_base = 1024, dram_unreserved_end = 1073741824, physical_cores = {worker = [ 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7,  1x0,  1x1,  1x2,  1x3,  1x4,  1x5,  1x6,  1x7,  2x0,  2x1,  2x2,  2x3,  2x4,  2x5,  2x6,  2x7,  3x0,  3x1,  3x2,  3x3,  3x4,  3x5,  3x6,  3x7,  4x0,  4x1,  4x2,  4x3,  4x4,  4x5,  4x6,  4x7,  5x0,  5x1,  5x2,  5x3,  5x4,  5x5,  5x6,  5x7,  6x0,  6x1,  6x2,  6x3,  6x4,  6x5,  6x6,  6x7,  7x0,  7x1,  7x2,  7x3,  7x4,  7x5,  7x6,  7x7] dram = [ 8x0,  9x0,  10x0,  8x1,  9x1,  10x1,  8x2,  9x2,  10x2,  8x3,  9x3,  10x3]}, supported_data_types = [<f32>, <f16>, <bf16>, <bfp_f8>, <bfp_bf8>, <bfp_f4>, <bfp_bf4>, <bfp_f2>, <bfp_bf2>, <u32>, <u16>, <u8>], supported_tile_sizes = [ 4x16,  16x16,  32x16,  4x32,  16x32,  32x32]}], [0], [3 : i32], [ 0x0x0x0]>
+#layout = #tt.layout<(d0, d1, d2, d3) -> (d0 * 672 + d1 * 224 + d2, d3), undef, <1x1>, memref<672x224xf32, #system>>
+#layout1 = #tt.layout<(d0, d1, d2) -> (d0 + d1, d2), undef, <1x1>, memref<64x1xf32, #system>>
+#layout2 = #tt.layout<(d0, d1, d2) -> (d0 + d1, d2), undef, <1x1>, memref<256x1xf32, #system>>
+#layout3 = #tt.layout<(d0, d1, d2) -> (d0 + d1, d2), undef, <1x1>, memref<128x1xf32, #system>>
+#layout4 = #tt.layout<(d0, d1, d2) -> (d0 + d1, d2), undef, <1x1>, memref<512x1xf32, #system>>
+#layout5 = #tt.layout<(d0, d1, d2) -> (d0 + d1, d2), undef, <1x1>, memref<1024x1xf32, #system>>
+#layout6 = #tt.layout<(d0, d1, d2) -> (d0 + d1, d2), undef, <1x1>, memref<2048x1xf32, #system>>
+#layout7 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 21 + d1 * 7 + d2, d3), undef, <1x1>, memref<1344x7xf32, #system>>
+#layout8 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 64 + d1 + d2, d3), undef, <1x1>, memref<4096x1xf32, #system>>
+#layout9 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 192 + d1 * 3 + d2, d3), undef, <1x1>, memref<12288x3xf32, #system>>
+#layout10 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 64 + d1 + d2, d3), undef, <1x1>, memref<16384x1xf32, #system>>
+#layout11 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 256 + d1 + d2, d3), undef, <1x1>, memref<16384x1xf32, #system>>
+#layout12 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 256 + d1 + d2, d3), undef, <1x1>, memref<32768x1xf32, #system>>
+#layout13 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 384 + d1 * 3 + d2, d3), undef, <1x1>, memref<49152x3xf32, #system>>
+#layout14 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 128 + d1 + d2, d3), undef, <1x1>, memref<65536x1xf32, #system>>
+#layout15 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 256 + d1 + d2, d3), undef, <1x1>, memref<131072x1xf32, #system>>
+#layout16 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 512 + d1 + d2, d3), undef, <1x1>, memref<65536x1xf32, #system>>
+#layout17 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 512 + d1 + d2, d3), undef, <1x1>, memref<131072x1xf32, #system>>
+#layout18 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 768 + d1 * 3 + d2, d3), undef, <1x1>, memref<196608x3xf32, #system>>
+#layout19 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 256 + d1 + d2, d3), undef, <1x1>, memref<262144x1xf32, #system>>
+#layout20 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 512 + d1 + d2, d3), undef, <1x1>, memref<524288x1xf32, #system>>
+#layout21 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 1024 + d1 + d2, d3), undef, <1x1>, memref<262144x1xf32, #system>>
+#layout22 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 1024 + d1 + d2, d3), undef, <1x1>, memref<524288x1xf32, #system>>
+#layout23 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 1536 + d1 * 3 + d2, d3), undef, <1x1>, memref<786432x3xf32, #system>>
+#layout24 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 512 + d1 + d2, d3), undef, <1x1>, memref<1048576x1xf32, #system>>
+#layout25 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 1024 + d1 + d2, d3), undef, <1x1>, memref<2097152x1xf32, #system>>
+#layout26 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 2048 + d1 + d2, d3), undef, <1x1>, memref<1048576x1xf32, #system>>
+#layout27 = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<2048x1000xf32, #system>>
+#layout28 = #tt.layout<(d0) -> (0, d0), undef, <1x1>, memref<1x1000xf32, #system>>
+#layout29 = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<1x1000xf32, #system>>
+#layout30 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 672 + d1 * 224 + d2, d3), undef, <1x1>, memref<672x224xf32, #dram>, interleaved>
+#layout31 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 672 + d1 * 3 + d2, d3), undef, <1x1>, memref<672x224xf32, #dram>, interleaved>
+#layout32 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 50176 + d1 * 224 + d2, d3), undef, <1x1>, memref<50176x3xf32, #dram>, interleaved>
+#layout33 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 12544 + d1 * 112 + d2, d3), undef, <1x1>, memref<12544x64xf32, #dram>, interleaved>
+#layout34 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 7168 + d1 * 64 + d2, d3), undef, <1x1>, memref<7168x112xf32, #dram>, interleaved>
+#layout35 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 7168 + d1 * 112 + d2, d3), undef, <1x1>, memref<7168x112xf32, #dram>, interleaved>
+#layout36 = #tt.layout<(d0, d1, d2) -> (d0 + d1, d2), undef, <1x1>, memref<64x1xf32, #dram>, interleaved>
+#layout37 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 7168 + d1 * 7168 + d2, d3), undef, <1x1>, memref<7168x112xf32, #dram>, interleaved>
+#layout38 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 3584 + d1 * 3584 + d2, d3), undef, <1x1>, memref<3584x56xf32, #dram>, interleaved>
+#layout39 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 3584 + d1 * 56 + d2, d3), undef, <1x1>, memref<3584x56xf32, #dram>, interleaved>
+#layout40 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 3584 + d1 * 64 + d2, d3), undef, <1x1>, memref<3584x56xf32, #dram>, interleaved>
+#layout41 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 3136 + d1 * 56 + d2, d3), undef, <1x1>, memref<3136x64xf32, #dram>, interleaved>
+#layout42 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 3136 + d1 * 56 + d2, d3), undef, <1x1>, memref<3136x256xf32, #dram>, interleaved>
+#layout43 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 14336 + d1 * 256 + d2, d3), undef, <1x1>, memref<14336x56xf32, #dram>, interleaved>
+#layout44 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 14336 + d1 * 56 + d2, d3), undef, <1x1>, memref<14336x56xf32, #dram>, interleaved>
+#layout45 = #tt.layout<(d0, d1, d2) -> (d0 + d1, d2), undef, <1x1>, memref<256x1xf32, #dram>, interleaved>
+#layout46 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 3136 + d1 * 56 + d2, d3), undef, <1x1>, memref<3136x128xf32, #dram>, interleaved>
+#layout47 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 7168 + d1 * 128 + d2, d3), undef, <1x1>, memref<7168x56xf32, #dram>, interleaved>
+#layout48 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 7168 + d1 * 56 + d2, d3), undef, <1x1>, memref<7168x56xf32, #dram>, interleaved>
+#layout49 = #tt.layout<(d0, d1, d2) -> (d0 + d1, d2), undef, <1x1>, memref<128x1xf32, #dram>, interleaved>
+#layout50 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 784 + d1 * 28 + d2, d3), undef, <1x1>, memref<784x128xf32, #dram>, interleaved>
+#layout51 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 3584 + d1 * 128 + d2, d3), undef, <1x1>, memref<3584x28xf32, #dram>, interleaved>
+#layout52 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 3584 + d1 * 28 + d2, d3), undef, <1x1>, memref<3584x28xf32, #dram>, interleaved>
+#layout53 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 784 + d1 * 28 + d2, d3), undef, <1x1>, memref<784x512xf32, #dram>, interleaved>
+#layout54 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 14336 + d1 * 512 + d2, d3), undef, <1x1>, memref<14336x28xf32, #dram>, interleaved>
+#layout55 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 14336 + d1 * 28 + d2, d3), undef, <1x1>, memref<14336x28xf32, #dram>, interleaved>
+#layout56 = #tt.layout<(d0, d1, d2) -> (d0 + d1, d2), undef, <1x1>, memref<512x1xf32, #dram>, interleaved>
+#layout57 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 784 + d1 * 28 + d2, d3), undef, <1x1>, memref<784x256xf32, #dram>, interleaved>
+#layout58 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 7168 + d1 * 256 + d2, d3), undef, <1x1>, memref<7168x28xf32, #dram>, interleaved>
+#layout59 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 7168 + d1 * 28 + d2, d3), undef, <1x1>, memref<7168x28xf32, #dram>, interleaved>
+#layout60 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 196 + d1 * 14 + d2, d3), undef, <1x1>, memref<196x256xf32, #dram>, interleaved>
+#layout61 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 3584 + d1 * 256 + d2, d3), undef, <1x1>, memref<3584x14xf32, #dram>, interleaved>
+#layout62 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 3584 + d1 * 14 + d2, d3), undef, <1x1>, memref<3584x14xf32, #dram>, interleaved>
+#layout63 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 196 + d1 * 14 + d2, d3), undef, <1x1>, memref<196x1024xf32, #dram>, interleaved>
+#layout64 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 14336 + d1 * 1024 + d2, d3), undef, <1x1>, memref<14336x14xf32, #dram>, interleaved>
+#layout65 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 14336 + d1 * 14 + d2, d3), undef, <1x1>, memref<14336x14xf32, #dram>, interleaved>
+#layout66 = #tt.layout<(d0, d1, d2) -> (d0 + d1, d2), undef, <1x1>, memref<1024x1xf32, #dram>, interleaved>
+#layout67 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 196 + d1 * 14 + d2, d3), undef, <1x1>, memref<196x512xf32, #dram>, interleaved>
+#layout68 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 7168 + d1 * 512 + d2, d3), undef, <1x1>, memref<7168x14xf32, #dram>, interleaved>
+#layout69 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 7168 + d1 * 14 + d2, d3), undef, <1x1>, memref<7168x14xf32, #dram>, interleaved>
+#layout70 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 49 + d1 * 7 + d2, d3), undef, <1x1>, memref<49x512xf32, #dram>, interleaved>
+#layout71 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 3584 + d1 * 512 + d2, d3), undef, <1x1>, memref<3584x7xf32, #dram>, interleaved>
+#layout72 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 3584 + d1 * 7 + d2, d3), undef, <1x1>, memref<3584x7xf32, #dram>, interleaved>
+#layout73 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 49 + d1 * 7 + d2, d3), undef, <1x1>, memref<49x2048xf32, #dram>, interleaved>
+#layout74 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 14336 + d1 * 2048 + d2, d3), undef, <1x1>, memref<14336x7xf32, #dram>, interleaved>
+#layout75 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 14336 + d1 * 7 + d2, d3), undef, <1x1>, memref<14336x7xf32, #dram>, interleaved>
+#layout76 = #tt.layout<(d0, d1, d2) -> (d0 + d1, d2), undef, <1x1>, memref<2048x1xf32, #dram>, interleaved>
+#layout77 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 2048 + d1 * 2048 + d2, d3), undef, <1x1>, memref<2048x49xf32, #dram>, interleaved>
+#layout78 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 49 + d1 * 49 + d2, d3), undef, <1x1>, memref<49x2048xf32, #dram>, interleaved>
+#layout79 = #tt.layout<(d0, d1, d2, d3) -> (d0 + d1 + d2, d3), undef, <1x1>, memref<1x2048xf32, #dram>, interleaved>
+#layout80 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 2048 + d1 + d2, d3), undef, <1x1>, memref<2048x1xf32, #dram>, interleaved>
+#layout81 = #tt.layout<(d0, d1, d2) -> (d0 * 2048 + d1, d2), undef, <1x1>, memref<2048x1xf32, #dram>, interleaved>
+#layout82 = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<1x2048xf32, #dram>, interleaved>
+#layout83 = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<2048x1000xf32, #dram>, interleaved>
+#layout84 = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<1x1000xf32, #dram>, interleaved>
+#layout85 = #tt.layout<(d0) -> (0, d0), undef, <1x1>, memref<1x1000xf32, #dram>, interleaved>
+module @ResNet attributes {tt.device = #device, tt.system_desc = #system_desc} {
+  func.func @forward(%arg0: tensor<1x3x224x224xf32, #layout> {ttir.name = "input_1"} loc("ResNet":0:0), %arg1: tensor<64x1x1xf32, #layout1> {ttir.name = "input_1_add_1"} loc("ResNet":0:0), %arg2: tensor<64x1x1xf32, #layout1> {ttir.name = "input_1_add_1_fork_clone1229"} loc("ResNet":0:0), %arg3: tensor<64x1x1xf32, #layout1> {ttir.name = "input_1_add_18"} loc("ResNet":0:0), %arg4: tensor<64x1x1xf32, #layout1> {ttir.name = "input_1_add_18_fork_clone1271"} loc("ResNet":0:0), %arg5: tensor<64x1x1xf32, #layout1> {ttir.name = "input_1_add_34"} loc("ResNet":0:0), %arg6: tensor<64x1x1xf32, #layout1> {ttir.name = "input_1_add_34_fork_clone1204"} loc("ResNet":0:0), %arg7: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_50"} loc("ResNet":0:0), %arg8: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_50_fork_clone1108"} loc("ResNet":0:0), %arg9: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_65"} loc("ResNet":0:0), %arg10: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_65_fork_clone1112"} loc("ResNet":0:0), %arg11: tensor<64x1x1xf32, #layout1> {ttir.name = "input_1_add_82"} loc("ResNet":0:0), %arg12: tensor<64x1x1xf32, #layout1> {ttir.name = "input_1_add_82_fork_clone1238"} loc("ResNet":0:0), %arg13: tensor<64x1x1xf32, #layout1> {ttir.name = "input_1_add_98"} loc("ResNet":0:0), %arg14: tensor<64x1x1xf32, #layout1> {ttir.name = "input_1_add_98_fork_clone1152"} loc("ResNet":0:0), %arg15: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_114"} loc("ResNet":0:0), %arg16: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_114_fork_clone1051"} loc("ResNet":0:0), %arg17: tensor<64x1x1xf32, #layout1> {ttir.name = "input_1_add_131"} loc("ResNet":0:0), %arg18: tensor<64x1x1xf32, #layout1> {ttir.name = "input_1_add_131_fork_clone1192"} loc("ResNet":0:0), %arg19: tensor<64x1x1xf32, #layout1> {ttir.name = "input_1_add_147"} loc("ResNet":0:0), %arg20: tensor<64x1x1xf32, #layout1> {ttir.name = "input_1_add_147_fork_clone1096"} loc("ResNet":0:0), %arg21: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_163"} loc("ResNet":0:0), %arg22: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_163_fork_clone992"} loc("ResNet":0:0), %arg23: tensor<128x1x1xf32, #layout3> {ttir.name = "input_1_add_180"} loc("ResNet":0:0), %arg24: tensor<128x1x1xf32, #layout3> {ttir.name = "input_1_add_180_fork_clone1065"} loc("ResNet":0:0), %arg25: tensor<128x1x1xf32, #layout3> {ttir.name = "input_1_add_196"} loc("ResNet":0:0), %arg26: tensor<128x1x1xf32, #layout3> {ttir.name = "input_1_add_196_fork_clone962"} loc("ResNet":0:0), %arg27: tensor<512x1x1xf32, #layout4> {ttir.name = "input_1_add_212"} loc("ResNet":0:0), %arg28: tensor<512x1x1xf32, #layout4> {ttir.name = "input_1_add_212_fork_clone853"} loc("ResNet":0:0), %arg29: tensor<512x1x1xf32, #layout4> {ttir.name = "input_1_add_227"} loc("ResNet":0:0), %arg30: tensor<512x1x1xf32, #layout4> {ttir.name = "input_1_add_227_fork_clone857"} loc("ResNet":0:0), %arg31: tensor<128x1x1xf32, #layout3> {ttir.name = "input_1_add_244"} loc("ResNet":0:0), %arg32: tensor<128x1x1xf32, #layout3> {ttir.name = "input_1_add_244_fork_clone1007"} loc("ResNet":0:0), %arg33: tensor<128x1x1xf32, #layout3> {ttir.name = "input_1_add_260"} loc("ResNet":0:0), %arg34: tensor<128x1x1xf32, #layout3> {ttir.name = "input_1_add_260_fork_clone901"} loc("ResNet":0:0), %arg35: tensor<512x1x1xf32, #layout4> {ttir.name = "input_1_add_276"} loc("ResNet":0:0), %arg36: tensor<512x1x1xf32, #layout4> {ttir.name = "input_1_add_276_fork_clone791"} loc("ResNet":0:0), %arg37: tensor<128x1x1xf32, #layout3> {ttir.name = "input_1_add_293"} loc("ResNet":0:0), %arg38: tensor<128x1x1xf32, #layout3> {ttir.name = "input_1_add_293_fork_clone950"} loc("ResNet":0:0), %arg39: tensor<128x1x1xf32, #layout3> {ttir.name = "input_1_add_309"} loc("ResNet":0:0), %arg40: tensor<128x1x1xf32, #layout3> {ttir.name = "input_1_add_309_fork_clone841"} loc("ResNet":0:0), %arg41: tensor<512x1x1xf32, #layout4> {ttir.name = "input_1_add_325"} loc("ResNet":0:0), %arg42: tensor<512x1x1xf32, #layout4> {ttir.name = "input_1_add_325_fork_clone735"} loc("ResNet":0:0), %arg43: tensor<128x1x1xf32, #layout3> {ttir.name = "input_1_add_342"} loc("ResNet":0:0), %arg44: tensor<128x1x1xf32, #layout3> {ttir.name = "input_1_add_342_fork_clone889"} loc("ResNet":0:0), %arg45: tensor<128x1x1xf32, #layout3> {ttir.name = "input_1_add_358"} loc("ResNet":0:0), %arg46: tensor<128x1x1xf32, #layout3> {ttir.name = "input_1_add_358_fork_clone779"} loc("ResNet":0:0), %arg47: tensor<512x1x1xf32, #layout4> {ttir.name = "input_1_add_374"} loc("ResNet":0:0), %arg48: tensor<512x1x1xf32, #layout4> {ttir.name = "input_1_add_374_fork_clone677"} loc("ResNet":0:0), %arg49: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_391"} loc("ResNet":0:0), %arg50: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_391_fork_clone748"} loc("ResNet":0:0), %arg51: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_407"} loc("ResNet":0:0), %arg52: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_407_fork_clone645"} loc("ResNet":0:0), %arg53: tensor<1024x1x1xf32, #layout5> {ttir.name = "input_1_add_423"} loc("ResNet":0:0), %arg54: tensor<1024x1x1xf32, #layout5> {ttir.name = "input_1_add_423_fork_clone524"} loc("ResNet":0:0), %arg55: tensor<1024x1x1xf32, #layout5> {ttir.name = "input_1_add_438"} loc("ResNet":0:0), %arg56: tensor<1024x1x1xf32, #layout5> {ttir.name = "input_1_add_438_fork_clone528"} loc("ResNet":0:0), %arg57: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_455"} loc("ResNet":0:0), %arg58: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_455_fork_clone692"} loc("ResNet":0:0), %arg59: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_471"} loc("ResNet":0:0), %arg60: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_471_fork_clone580"} loc("ResNet":0:0), %arg61: tensor<1024x1x1xf32, #layout5> {ttir.name = "input_1_add_487"} loc("ResNet":0:0), %arg62: tensor<1024x1x1xf32, #layout5> {ttir.name = "input_1_add_487_fork_clone453"} loc("ResNet":0:0), %arg63: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_504"} loc("ResNet":0:0), %arg64: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_504_fork_clone633"} loc("ResNet":0:0), %arg65: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_520"} loc("ResNet":0:0), %arg66: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_520_fork_clone512"} loc("ResNet":0:0), %arg67: tensor<1024x1x1xf32, #layout5> {ttir.name = "input_1_add_536"} loc("ResNet":0:0), %arg68: tensor<1024x1x1xf32, #layout5> {ttir.name = "input_1_add_536_fork_clone389"} loc("ResNet":0:0), %arg69: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_553"} loc("ResNet":0:0), %arg70: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_553_fork_clone568"} loc("ResNet":0:0), %arg71: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_569"} loc("ResNet":0:0), %arg72: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_569_fork_clone441"} loc("ResNet":0:0), %arg73: tensor<1024x1x1xf32, #layout5> {ttir.name = "input_1_add_585"} loc("ResNet":0:0), %arg74: tensor<1024x1x1xf32, #layout5> {ttir.name = "input_1_add_585_fork_clone329"} loc("ResNet":0:0), %arg75: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_602"} loc("ResNet":0:0), %arg76: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_602_fork_clone500"} loc("ResNet":0:0), %arg77: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_618"} loc("ResNet":0:0), %arg78: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_618_fork_clone377"} loc("ResNet":0:0), %arg79: tensor<1024x1x1xf32, #layout5> {ttir.name = "input_1_add_634"} loc("ResNet":0:0), %arg80: tensor<1024x1x1xf32, #layout5> {ttir.name = "input_1_add_634_fork_clone274"} loc("ResNet":0:0), %arg81: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_651"} loc("ResNet":0:0), %arg82: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_651_fork_clone429"} loc("ResNet":0:0), %arg83: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_667"} loc("ResNet":0:0), %arg84: tensor<256x1x1xf32, #layout2> {ttir.name = "input_1_add_667_fork_clone317"} loc("ResNet":0:0), %arg85: tensor<1024x1x1xf32, #layout5> {ttir.name = "input_1_add_683"} loc("ResNet":0:0), %arg86: tensor<1024x1x1xf32, #layout5> {ttir.name = "input_1_add_683_fork_clone219"} loc("ResNet":0:0), %arg87: tensor<512x1x1xf32, #layout4> {ttir.name = "input_1_add_700"} loc("ResNet":0:0), %arg88: tensor<512x1x1xf32, #layout4> {ttir.name = "input_1_add_700_fork_clone287"} loc("ResNet":0:0), %arg89: tensor<512x1x1xf32, #layout4> {ttir.name = "input_1_add_716"} loc("ResNet":0:0), %arg90: tensor<512x1x1xf32, #layout4> {ttir.name = "input_1_add_716_fork_clone190"} loc("ResNet":0:0), %arg91: tensor<2048x1x1xf32, #layout6> {ttir.name = "input_1_add_732"} loc("ResNet":0:0), %arg92: tensor<2048x1x1xf32, #layout6> {ttir.name = "input_1_add_732_fork_clone101"} loc("ResNet":0:0), %arg93: tensor<2048x1x1xf32, #layout6> {ttir.name = "input_1_add_747"} loc("ResNet":0:0), %arg94: tensor<2048x1x1xf32, #layout6> {ttir.name = "input_1_add_747_fork_clone105"} loc("ResNet":0:0), %arg95: tensor<512x1x1xf32, #layout4> {ttir.name = "input_1_add_764"} loc("ResNet":0:0), %arg96: tensor<512x1x1xf32, #layout4> {ttir.name = "input_1_add_764_fork_clone233"} loc("ResNet":0:0), %arg97: tensor<512x1x1xf32, #layout4> {ttir.name = "input_1_add_780"} loc("ResNet":0:0), %arg98: tensor<512x1x1xf32, #layout4> {ttir.name = "input_1_add_780_fork_clone138"} loc("ResNet":0:0), %arg99: tensor<2048x1x1xf32, #layout6> {ttir.name = "input_1_add_796"} loc("ResNet":0:0), %arg100: tensor<2048x1x1xf32, #layout6> {ttir.name = "input_1_add_796_fork_clone61"} loc("ResNet":0:0), %arg101: tensor<512x1x1xf32, #layout4> {ttir.name = "input_1_add_813"} loc("ResNet":0:0), %arg102: tensor<512x1x1xf32, #layout4> {ttir.name = "input_1_add_813_fork_clone178"} loc("ResNet":0:0), %arg103: tensor<512x1x1xf32, #layout4> {ttir.name = "input_1_add_829"} loc("ResNet":0:0), %arg104: tensor<512x1x1xf32, #layout4> {ttir.name = "input_1_add_829_fork_clone89"} loc("ResNet":0:0), %arg105: tensor<2048x1x1xf32, #layout6> {ttir.name = "input_1_add_845"} loc("ResNet":0:0), %arg106: tensor<2048x1x1xf32, #layout6> {ttir.name = "input_1_add_845_fork_clone32"} loc("ResNet":0:0), %arg107: tensor<64x3x7x7xf32, #layout7> {ttir.name = "conv1.weight"} loc("ResNet":0:0), %arg108: tensor<64x64x1x1xf32, #layout8> {ttir.name = "layer1.0.conv1.weight"} loc("ResNet":0:0), %arg109: tensor<64x64x3x3xf32, #layout9> {ttir.name = "layer1.0.conv2.weight"} loc("ResNet":0:0), %arg110: tensor<256x64x1x1xf32, #layout10> {ttir.name = "layer1.0.conv3.weight"} loc("ResNet":0:0), %arg111: tensor<256x64x1x1xf32, #layout10> {ttir.name = "layer1.0.downsample.0.weight"} loc("ResNet":0:0), %arg112: tensor<64x256x1x1xf32, #layout11> {ttir.name = "layer1.1.conv1.weight"} loc("ResNet":0:0), %arg113: tensor<64x64x3x3xf32, #layout9> {ttir.name = "layer1.1.conv2.weight"} loc("ResNet":0:0), %arg114: tensor<256x64x1x1xf32, #layout10> {ttir.name = "layer1.1.conv3.weight"} loc("ResNet":0:0), %arg115: tensor<64x256x1x1xf32, #layout11> {ttir.name = "layer1.2.conv1.weight"} loc("ResNet":0:0), %arg116: tensor<64x64x3x3xf32, #layout9> {ttir.name = "layer1.2.conv2.weight"} loc("ResNet":0:0), %arg117: tensor<256x64x1x1xf32, #layout10> {ttir.name = "layer1.2.conv3.weight"} loc("ResNet":0:0), %arg118: tensor<128x256x1x1xf32, #layout12> {ttir.name = "layer2.0.conv1.weight"} loc("ResNet":0:0), %arg119: tensor<128x128x3x3xf32, #layout13> {ttir.name = "layer2.0.conv2.weight"} loc("ResNet":0:0), %arg120: tensor<512x128x1x1xf32, #layout14> {ttir.name = "layer2.0.conv3.weight"} loc("ResNet":0:0), %arg121: tensor<512x256x1x1xf32, #layout15> {ttir.name = "layer2.0.downsample.0.weight"} loc("ResNet":0:0), %arg122: tensor<128x512x1x1xf32, #layout16> {ttir.name = "layer2.1.conv1.weight"} loc("ResNet":0:0), %arg123: tensor<128x128x3x3xf32, #layout13> {ttir.name = "layer2.1.conv2.weight"} loc("ResNet":0:0), %arg124: tensor<512x128x1x1xf32, #layout14> {ttir.name = "layer2.1.conv3.weight"} loc("ResNet":0:0), %arg125: tensor<128x512x1x1xf32, #layout16> {ttir.name = "layer2.2.conv1.weight"} loc("ResNet":0:0), %arg126: tensor<128x128x3x3xf32, #layout13> {ttir.name = "layer2.2.conv2.weight"} loc("ResNet":0:0), %arg127: tensor<512x128x1x1xf32, #layout14> {ttir.name = "layer2.2.conv3.weight"} loc("ResNet":0:0), %arg128: tensor<128x512x1x1xf32, #layout16> {ttir.name = "layer2.3.conv1.weight"} loc("ResNet":0:0), %arg129: tensor<128x128x3x3xf32, #layout13> {ttir.name = "layer2.3.conv2.weight"} loc("ResNet":0:0), %arg130: tensor<512x128x1x1xf32, #layout14> {ttir.name = "layer2.3.conv3.weight"} loc("ResNet":0:0), %arg131: tensor<256x512x1x1xf32, #layout17> {ttir.name = "layer3.0.conv1.weight"} loc("ResNet":0:0), %arg132: tensor<256x256x3x3xf32, #layout18> {ttir.name = "layer3.0.conv2.weight"} loc("ResNet":0:0), %arg133: tensor<1024x256x1x1xf32, #layout19> {ttir.name = "layer3.0.conv3.weight"} loc("ResNet":0:0), %arg134: tensor<1024x512x1x1xf32, #layout20> {ttir.name = "layer3.0.downsample.0.weight"} loc("ResNet":0:0), %arg135: tensor<256x1024x1x1xf32, #layout21> {ttir.name = "layer3.1.conv1.weight"} loc("ResNet":0:0), %arg136: tensor<256x256x3x3xf32, #layout18> {ttir.name = "layer3.1.conv2.weight"} loc("ResNet":0:0), %arg137: tensor<1024x256x1x1xf32, #layout19> {ttir.name = "layer3.1.conv3.weight"} loc("ResNet":0:0), %arg138: tensor<256x1024x1x1xf32, #layout21> {ttir.name = "layer3.2.conv1.weight"} loc("ResNet":0:0), %arg139: tensor<256x256x3x3xf32, #layout18> {ttir.name = "layer3.2.conv2.weight"} loc("ResNet":0:0), %arg140: tensor<1024x256x1x1xf32, #layout19> {ttir.name = "layer3.2.conv3.weight"} loc("ResNet":0:0), %arg141: tensor<256x1024x1x1xf32, #layout21> {ttir.name = "layer3.3.conv1.weight"} loc("ResNet":0:0), %arg142: tensor<256x256x3x3xf32, #layout18> {ttir.name = "layer3.3.conv2.weight"} loc("ResNet":0:0), %arg143: tensor<1024x256x1x1xf32, #layout19> {ttir.name = "layer3.3.conv3.weight"} loc("ResNet":0:0), %arg144: tensor<256x1024x1x1xf32, #layout21> {ttir.name = "layer3.4.conv1.weight"} loc("ResNet":0:0), %arg145: tensor<256x256x3x3xf32, #layout18> {ttir.name = "layer3.4.conv2.weight"} loc("ResNet":0:0), %arg146: tensor<1024x256x1x1xf32, #layout19> {ttir.name = "layer3.4.conv3.weight"} loc("ResNet":0:0), %arg147: tensor<256x1024x1x1xf32, #layout21> {ttir.name = "layer3.5.conv1.weight"} loc("ResNet":0:0), %arg148: tensor<256x256x3x3xf32, #layout18> {ttir.name = "layer3.5.conv2.weight"} loc("ResNet":0:0), %arg149: tensor<1024x256x1x1xf32, #layout19> {ttir.name = "layer3.5.conv3.weight"} loc("ResNet":0:0), %arg150: tensor<512x1024x1x1xf32, #layout22> {ttir.name = "layer4.0.conv1.weight"} loc("ResNet":0:0), %arg151: tensor<512x512x3x3xf32, #layout23> {ttir.name = "layer4.0.conv2.weight"} loc("ResNet":0:0), %arg152: tensor<2048x512x1x1xf32, #layout24> {ttir.name = "layer4.0.conv3.weight"} loc("ResNet":0:0), %arg153: tensor<2048x1024x1x1xf32, #layout25> {ttir.name = "layer4.0.downsample.0.weight"} loc("ResNet":0:0), %arg154: tensor<512x2048x1x1xf32, #layout26> {ttir.name = "layer4.1.conv1.weight"} loc("ResNet":0:0), %arg155: tensor<512x512x3x3xf32, #layout23> {ttir.name = "layer4.1.conv2.weight"} loc("ResNet":0:0), %arg156: tensor<2048x512x1x1xf32, #layout24> {ttir.name = "layer4.1.conv3.weight"} loc("ResNet":0:0), %arg157: tensor<512x2048x1x1xf32, #layout26> {ttir.name = "layer4.2.conv1.weight"} loc("ResNet":0:0), %arg158: tensor<512x512x3x3xf32, #layout23> {ttir.name = "layer4.2.conv2.weight"} loc("ResNet":0:0), %arg159: tensor<2048x512x1x1xf32, #layout24> {ttir.name = "layer4.2.conv3.weight"} loc("ResNet":0:0), %arg160: tensor<2048x1000xf32, #layout27> {ttir.name = "fc.weight"} loc("ResNet":0:0), %arg161: tensor<1000xf32, #layout28> {ttir.name = "fc.bias"} loc("ResNet":0:0)) -> (tensor<1x1000xf32, #layout29> {ttir.name = "ResNet.output_add_867"}) {
+    %0 = "ttnn.get_device"() <{mesh_shape = #ttnn<mesh_shape 1x1>}> : () -> !tt.device<#device> loc(#loc447)
+    %1 = "ttnn.to_layout"(%arg0, %0) <{layout = #ttnn.layout<tile>}> : (tensor<1x3x224x224xf32, #layout>, !tt.device<#device>) -> tensor<1x3x224x224xf32, #layout30> loc(#loc447)
+    %2 = "ttnn.to_device"(%1, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<1x3x224x224xf32, #layout30>, !tt.device<#device>) -> tensor<1x3x224x224xf32, #layout30> loc(#loc447)
+    %3 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x224x3x224>}> : (!tt.device<#device>) -> tensor<1x224x3x224xf32, #layout31> loc(#loc447)
+    %4 = "ttnn.transpose"(%2, %3) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x3x224x224xf32, #layout30>, tensor<1x224x3x224xf32, #layout31>) -> tensor<1x224x3x224xf32, #layout31> loc(#loc447)
+    %5 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x224x224x3>}> : (!tt.device<#device>) -> tensor<1x224x224x3xf32, #layout32> loc(#loc448)
+    %6 = "ttnn.transpose"(%4, %5) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x224x3x224xf32, #layout31>, tensor<1x224x224x3xf32, #layout32>) -> tensor<1x224x224x3xf32, #layout32> loc(#loc448)
+    %7 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x112x112x64>}> : (!tt.device<#device>) -> tensor<1x112x112x64xf32, #layout33> loc(#loc449)
+    %8 = "ttnn.conv2d"(%6, %arg107, %7, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 3 : i32, input_height = 224 : i32, input_width = 224 : i32, kernel_height = 7 : i32, kernel_width = 7 : i32, out_channels = 64 : i32, padding_height = 3 : i32, padding_width = 3 : i32, stride_height = 2 : i32, stride_width = 2 : i32}> : (tensor<1x224x224x3xf32, #layout32>, tensor<64x3x7x7xf32, #layout7>, tensor<1x112x112x64xf32, #layout33>, !tt.device<#device>) -> tensor<1x112x112x64xf32, #layout33> loc(#loc449)
+    %9 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x112x64x112>}> : (!tt.device<#device>) -> tensor<1x112x64x112xf32, #layout34> loc(#loc450)
+    %10 = "ttnn.transpose"(%8, %9) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x112x112x64xf32, #layout33>, tensor<1x112x64x112xf32, #layout34>) -> tensor<1x112x64x112xf32, #layout34> loc(#loc450)
+    %11 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x112x112>}> : (!tt.device<#device>) -> tensor<1x64x112x112xf32, #layout35> loc(#loc451)
+    %12 = "ttnn.transpose"(%10, %11) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x112x64x112xf32, #layout34>, tensor<1x64x112x112xf32, #layout35>) -> tensor<1x64x112x112xf32, #layout35> loc(#loc451)
+    %13 = "ttnn.to_layout"(%arg1, %0) <{layout = #ttnn.layout<tile>}> : (tensor<64x1x1xf32, #layout1>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc452)
+    %14 = "ttnn.to_device"(%13, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<64x1x1xf32, #layout36>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc452)
+    %15 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x112x112>}> : (!tt.device<#device>) -> tensor<1x64x112x112xf32, #layout35> loc(#loc452)
+    %16 = "ttnn.multiply"(%12, %14, %15) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x64x112x112xf32, #layout35>, tensor<64x1x1xf32, #layout36>, tensor<1x64x112x112xf32, #layout35>) -> tensor<1x64x112x112xf32, #layout35> loc(#loc452)
+    %17 = "ttnn.to_layout"(%arg2, %0) <{layout = #ttnn.layout<tile>}> : (tensor<64x1x1xf32, #layout1>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc453)
+    %18 = "ttnn.to_device"(%17, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<64x1x1xf32, #layout36>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc453)
+    %19 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x112x112>}> : (!tt.device<#device>) -> tensor<1x64x112x112xf32, #layout35> loc(#loc453)
+    %20 = "ttnn.add"(%16, %18, %19) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x64x112x112xf32, #layout35>, tensor<64x1x1xf32, #layout36>, tensor<1x64x112x112xf32, #layout35>) -> tensor<1x64x112x112xf32, #layout35> loc(#loc453)
+    %21 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x112x112>}> : (!tt.device<#device>) -> tensor<1x64x112x112xf32, #layout35> loc(#loc454)
+    %22 = "ttnn.relu"(%20, %21) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x64x112x112xf32, #layout35>, tensor<1x64x112x112xf32, #layout35>) -> tensor<1x64x112x112xf32, #layout35> loc(#loc454)
+    %23 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1x7168x112>}> : (!tt.device<#device>) -> tensor<1x1x7168x112xf32, #layout37> loc(#loc455)
+    %24 = "ttnn.reshape"(%22, %23) <{shape = [1 : i32, 1 : i32, 7168 : i32, 112 : i32]}> : (tensor<1x64x112x112xf32, #layout35>, tensor<1x1x7168x112xf32, #layout37>) -> tensor<1x1x7168x112xf32, #layout37> loc(#loc455)
+    %25 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1x3584x56>}> : (!tt.device<#device>) -> tensor<1x1x3584x56xf32, #layout38> loc(#loc455)
+    %26 = "ttnn.max_pool2d"(%24, %25, %0) <{batch_size = 1 : si32, ceil_mode = false, channels = 112 : si32, dilation_height = 1 : si32, dilation_width = 1 : si32, input_height = 64 : si32, input_width = 112 : si32, kernel_height = 3 : si32, kernel_width = 3 : si32, padding_height = 1 : si32, padding_width = 1 : si32, stride_height = 2 : si32, stride_width = 2 : si32}> : (tensor<1x1x7168x112xf32, #layout37>, tensor<1x1x3584x56xf32, #layout38>, !tt.device<#device>) -> tensor<1x1x3584x56xf32, #layout38> loc(#loc455)
+    %27 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x56x56>}> : (!tt.device<#device>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc455)
+    %28 = "ttnn.reshape"(%26, %27) <{shape = [1 : i32, 64 : i32, 56 : i32, 56 : i32]}> : (tensor<1x1x3584x56xf32, #layout38>, tensor<1x64x56x56xf32, #layout39>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc455)
+    %29 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x64x56>}> : (!tt.device<#device>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc456)
+    %30 = "ttnn.transpose"(%28, %29) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x64x56x56xf32, #layout39>, tensor<1x56x64x56xf32, #layout40>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc456)
+    %31 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x56x64>}> : (!tt.device<#device>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc457)
+    %32 = "ttnn.transpose"(%30, %31) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x56x64x56xf32, #layout40>, tensor<1x56x56x64xf32, #layout41>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc457)
+    %33 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x56x64>}> : (!tt.device<#device>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc458)
+    %34 = "ttnn.conv2d"(%32, %arg108, %33, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 64 : i32, input_height = 56 : i32, input_width = 56 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 64 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x56x56x64xf32, #layout41>, tensor<64x64x1x1xf32, #layout8>, tensor<1x56x56x64xf32, #layout41>, !tt.device<#device>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc458)
+    %35 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x64x56>}> : (!tt.device<#device>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc459)
+    %36 = "ttnn.transpose"(%34, %35) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x56x56x64xf32, #layout41>, tensor<1x56x64x56xf32, #layout40>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc459)
+    %37 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x56x56>}> : (!tt.device<#device>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc460)
+    %38 = "ttnn.transpose"(%36, %37) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x56x64x56xf32, #layout40>, tensor<1x64x56x56xf32, #layout39>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc460)
+    %39 = "ttnn.to_layout"(%arg3, %0) <{layout = #ttnn.layout<tile>}> : (tensor<64x1x1xf32, #layout1>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc461)
+    %40 = "ttnn.to_device"(%39, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<64x1x1xf32, #layout36>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc461)
+    %41 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x56x56>}> : (!tt.device<#device>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc461)
+    %42 = "ttnn.multiply"(%38, %40, %41) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x64x56x56xf32, #layout39>, tensor<64x1x1xf32, #layout36>, tensor<1x64x56x56xf32, #layout39>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc461)
+    %43 = "ttnn.to_layout"(%arg4, %0) <{layout = #ttnn.layout<tile>}> : (tensor<64x1x1xf32, #layout1>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc462)
+    %44 = "ttnn.to_device"(%43, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<64x1x1xf32, #layout36>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc462)
+    %45 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x56x56>}> : (!tt.device<#device>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc462)
+    %46 = "ttnn.add"(%42, %44, %45) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x64x56x56xf32, #layout39>, tensor<64x1x1xf32, #layout36>, tensor<1x64x56x56xf32, #layout39>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc462)
+    %47 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x56x56>}> : (!tt.device<#device>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc463)
+    %48 = "ttnn.relu"(%46, %47) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x64x56x56xf32, #layout39>, tensor<1x64x56x56xf32, #layout39>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc463)
+    %49 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x64x56>}> : (!tt.device<#device>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc464)
+    %50 = "ttnn.transpose"(%48, %49) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x64x56x56xf32, #layout39>, tensor<1x56x64x56xf32, #layout40>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc464)
+    %51 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x56x64>}> : (!tt.device<#device>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc465)
+    %52 = "ttnn.transpose"(%50, %51) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x56x64x56xf32, #layout40>, tensor<1x56x56x64xf32, #layout41>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc465)
+    %53 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x56x64>}> : (!tt.device<#device>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc466)
+    %54 = "ttnn.conv2d"(%52, %arg109, %53, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 64 : i32, input_height = 56 : i32, input_width = 56 : i32, kernel_height = 3 : i32, kernel_width = 3 : i32, out_channels = 64 : i32, padding_height = 1 : i32, padding_width = 1 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x56x56x64xf32, #layout41>, tensor<64x64x3x3xf32, #layout9>, tensor<1x56x56x64xf32, #layout41>, !tt.device<#device>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc466)
+    %55 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x64x56>}> : (!tt.device<#device>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc467)
+    %56 = "ttnn.transpose"(%54, %55) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x56x56x64xf32, #layout41>, tensor<1x56x64x56xf32, #layout40>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc467)
+    %57 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x56x56>}> : (!tt.device<#device>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc468)
+    %58 = "ttnn.transpose"(%56, %57) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x56x64x56xf32, #layout40>, tensor<1x64x56x56xf32, #layout39>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc468)
+    %59 = "ttnn.to_layout"(%arg5, %0) <{layout = #ttnn.layout<tile>}> : (tensor<64x1x1xf32, #layout1>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc469)
+    %60 = "ttnn.to_device"(%59, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<64x1x1xf32, #layout36>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc469)
+    %61 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x56x56>}> : (!tt.device<#device>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc469)
+    %62 = "ttnn.multiply"(%58, %60, %61) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x64x56x56xf32, #layout39>, tensor<64x1x1xf32, #layout36>, tensor<1x64x56x56xf32, #layout39>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc469)
+    %63 = "ttnn.to_layout"(%arg6, %0) <{layout = #ttnn.layout<tile>}> : (tensor<64x1x1xf32, #layout1>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc470)
+    %64 = "ttnn.to_device"(%63, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<64x1x1xf32, #layout36>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc470)
+    %65 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x56x56>}> : (!tt.device<#device>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc470)
+    %66 = "ttnn.add"(%62, %64, %65) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x64x56x56xf32, #layout39>, tensor<64x1x1xf32, #layout36>, tensor<1x64x56x56xf32, #layout39>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc470)
+    %67 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x56x56>}> : (!tt.device<#device>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc471)
+    %68 = "ttnn.relu"(%66, %67) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x64x56x56xf32, #layout39>, tensor<1x64x56x56xf32, #layout39>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc471)
+    %69 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x64x56>}> : (!tt.device<#device>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc472)
+    %70 = "ttnn.transpose"(%68, %69) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x64x56x56xf32, #layout39>, tensor<1x56x64x56xf32, #layout40>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc472)
+    %71 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x56x64>}> : (!tt.device<#device>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc473)
+    %72 = "ttnn.transpose"(%70, %71) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x56x64x56xf32, #layout40>, tensor<1x56x56x64xf32, #layout41>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc473)
+    %73 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x56x256>}> : (!tt.device<#device>) -> tensor<1x56x56x256xf32, #layout42> loc(#loc474)
+    %74 = "ttnn.conv2d"(%72, %arg110, %73, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 64 : i32, input_height = 56 : i32, input_width = 56 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 256 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x56x56x64xf32, #layout41>, tensor<256x64x1x1xf32, #layout10>, tensor<1x56x56x256xf32, #layout42>, !tt.device<#device>) -> tensor<1x56x56x256xf32, #layout42> loc(#loc474)
+    %75 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x256x56>}> : (!tt.device<#device>) -> tensor<1x56x256x56xf32, #layout43> loc(#loc475)
+    %76 = "ttnn.transpose"(%74, %75) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x56x56x256xf32, #layout42>, tensor<1x56x256x56xf32, #layout43>) -> tensor<1x56x256x56xf32, #layout43> loc(#loc475)
+    %77 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x56x56>}> : (!tt.device<#device>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc476)
+    %78 = "ttnn.transpose"(%76, %77) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x56x256x56xf32, #layout43>, tensor<1x256x56x56xf32, #layout44>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc476)
+    %79 = "ttnn.to_layout"(%arg7, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc477)
+    %80 = "ttnn.to_device"(%79, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc477)
+    %81 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x56x56>}> : (!tt.device<#device>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc477)
+    %82 = "ttnn.multiply"(%78, %80, %81) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x56x56xf32, #layout44>, tensor<256x1x1xf32, #layout45>, tensor<1x256x56x56xf32, #layout44>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc477)
+    %83 = "ttnn.to_layout"(%arg8, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc478)
+    %84 = "ttnn.to_device"(%83, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc478)
+    %85 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x56x56>}> : (!tt.device<#device>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc478)
+    %86 = "ttnn.add"(%82, %84, %85) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x56x56xf32, #layout44>, tensor<256x1x1xf32, #layout45>, tensor<1x256x56x56xf32, #layout44>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc478)
+    %87 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x64x56>}> : (!tt.device<#device>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc479)
+    %88 = "ttnn.transpose"(%28, %87) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x64x56x56xf32, #layout39>, tensor<1x56x64x56xf32, #layout40>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc479)
+    %89 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x56x64>}> : (!tt.device<#device>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc480)
+    %90 = "ttnn.transpose"(%88, %89) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x56x64x56xf32, #layout40>, tensor<1x56x56x64xf32, #layout41>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc480)
+    %91 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x56x256>}> : (!tt.device<#device>) -> tensor<1x56x56x256xf32, #layout42> loc(#loc481)
+    %92 = "ttnn.conv2d"(%90, %arg111, %91, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 64 : i32, input_height = 56 : i32, input_width = 56 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 256 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x56x56x64xf32, #layout41>, tensor<256x64x1x1xf32, #layout10>, tensor<1x56x56x256xf32, #layout42>, !tt.device<#device>) -> tensor<1x56x56x256xf32, #layout42> loc(#loc481)
+    %93 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x256x56>}> : (!tt.device<#device>) -> tensor<1x56x256x56xf32, #layout43> loc(#loc482)
+    %94 = "ttnn.transpose"(%92, %93) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x56x56x256xf32, #layout42>, tensor<1x56x256x56xf32, #layout43>) -> tensor<1x56x256x56xf32, #layout43> loc(#loc482)
+    %95 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x56x56>}> : (!tt.device<#device>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc483)
+    %96 = "ttnn.transpose"(%94, %95) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x56x256x56xf32, #layout43>, tensor<1x256x56x56xf32, #layout44>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc483)
+    %97 = "ttnn.to_layout"(%arg9, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc484)
+    %98 = "ttnn.to_device"(%97, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc484)
+    %99 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x56x56>}> : (!tt.device<#device>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc484)
+    %100 = "ttnn.multiply"(%96, %98, %99) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x56x56xf32, #layout44>, tensor<256x1x1xf32, #layout45>, tensor<1x256x56x56xf32, #layout44>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc484)
+    %101 = "ttnn.to_layout"(%arg10, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc485)
+    %102 = "ttnn.to_device"(%101, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc485)
+    %103 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x56x56>}> : (!tt.device<#device>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc485)
+    %104 = "ttnn.add"(%100, %102, %103) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x56x56xf32, #layout44>, tensor<256x1x1xf32, #layout45>, tensor<1x256x56x56xf32, #layout44>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc485)
+    %105 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x56x56>}> : (!tt.device<#device>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc486)
+    %106 = "ttnn.add"(%86, %104, %105) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x56x56xf32, #layout44>, tensor<1x256x56x56xf32, #layout44>, tensor<1x256x56x56xf32, #layout44>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc486)
+    %107 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x56x56>}> : (!tt.device<#device>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc487)
+    %108 = "ttnn.relu"(%106, %107) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x256x56x56xf32, #layout44>, tensor<1x256x56x56xf32, #layout44>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc487)
+    %109 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x256x56>}> : (!tt.device<#device>) -> tensor<1x56x256x56xf32, #layout43> loc(#loc488)
+    %110 = "ttnn.transpose"(%108, %109) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x256x56x56xf32, #layout44>, tensor<1x56x256x56xf32, #layout43>) -> tensor<1x56x256x56xf32, #layout43> loc(#loc488)
+    %111 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x56x256>}> : (!tt.device<#device>) -> tensor<1x56x56x256xf32, #layout42> loc(#loc489)
+    %112 = "ttnn.transpose"(%110, %111) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x56x256x56xf32, #layout43>, tensor<1x56x56x256xf32, #layout42>) -> tensor<1x56x56x256xf32, #layout42> loc(#loc489)
+    %113 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x56x64>}> : (!tt.device<#device>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc490)
+    %114 = "ttnn.conv2d"(%112, %arg112, %113, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 256 : i32, input_height = 56 : i32, input_width = 56 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 64 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x56x56x256xf32, #layout42>, tensor<64x256x1x1xf32, #layout11>, tensor<1x56x56x64xf32, #layout41>, !tt.device<#device>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc490)
+    %115 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x64x56>}> : (!tt.device<#device>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc491)
+    %116 = "ttnn.transpose"(%114, %115) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x56x56x64xf32, #layout41>, tensor<1x56x64x56xf32, #layout40>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc491)
+    %117 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x56x56>}> : (!tt.device<#device>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc492)
+    %118 = "ttnn.transpose"(%116, %117) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x56x64x56xf32, #layout40>, tensor<1x64x56x56xf32, #layout39>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc492)
+    %119 = "ttnn.to_layout"(%arg11, %0) <{layout = #ttnn.layout<tile>}> : (tensor<64x1x1xf32, #layout1>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc493)
+    %120 = "ttnn.to_device"(%119, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<64x1x1xf32, #layout36>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc493)
+    %121 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x56x56>}> : (!tt.device<#device>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc493)
+    %122 = "ttnn.multiply"(%118, %120, %121) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x64x56x56xf32, #layout39>, tensor<64x1x1xf32, #layout36>, tensor<1x64x56x56xf32, #layout39>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc493)
+    %123 = "ttnn.to_layout"(%arg12, %0) <{layout = #ttnn.layout<tile>}> : (tensor<64x1x1xf32, #layout1>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc494)
+    %124 = "ttnn.to_device"(%123, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<64x1x1xf32, #layout36>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc494)
+    %125 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x56x56>}> : (!tt.device<#device>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc494)
+    %126 = "ttnn.add"(%122, %124, %125) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x64x56x56xf32, #layout39>, tensor<64x1x1xf32, #layout36>, tensor<1x64x56x56xf32, #layout39>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc494)
+    %127 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x56x56>}> : (!tt.device<#device>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc495)
+    %128 = "ttnn.relu"(%126, %127) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x64x56x56xf32, #layout39>, tensor<1x64x56x56xf32, #layout39>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc495)
+    %129 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x64x56>}> : (!tt.device<#device>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc496)
+    %130 = "ttnn.transpose"(%128, %129) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x64x56x56xf32, #layout39>, tensor<1x56x64x56xf32, #layout40>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc496)
+    %131 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x56x64>}> : (!tt.device<#device>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc497)
+    %132 = "ttnn.transpose"(%130, %131) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x56x64x56xf32, #layout40>, tensor<1x56x56x64xf32, #layout41>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc497)
+    %133 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x56x64>}> : (!tt.device<#device>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc498)
+    %134 = "ttnn.conv2d"(%132, %arg113, %133, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 64 : i32, input_height = 56 : i32, input_width = 56 : i32, kernel_height = 3 : i32, kernel_width = 3 : i32, out_channels = 64 : i32, padding_height = 1 : i32, padding_width = 1 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x56x56x64xf32, #layout41>, tensor<64x64x3x3xf32, #layout9>, tensor<1x56x56x64xf32, #layout41>, !tt.device<#device>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc498)
+    %135 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x64x56>}> : (!tt.device<#device>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc499)
+    %136 = "ttnn.transpose"(%134, %135) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x56x56x64xf32, #layout41>, tensor<1x56x64x56xf32, #layout40>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc499)
+    %137 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x56x56>}> : (!tt.device<#device>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc500)
+    %138 = "ttnn.transpose"(%136, %137) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x56x64x56xf32, #layout40>, tensor<1x64x56x56xf32, #layout39>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc500)
+    %139 = "ttnn.to_layout"(%arg13, %0) <{layout = #ttnn.layout<tile>}> : (tensor<64x1x1xf32, #layout1>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc501)
+    %140 = "ttnn.to_device"(%139, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<64x1x1xf32, #layout36>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc501)
+    %141 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x56x56>}> : (!tt.device<#device>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc501)
+    %142 = "ttnn.multiply"(%138, %140, %141) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x64x56x56xf32, #layout39>, tensor<64x1x1xf32, #layout36>, tensor<1x64x56x56xf32, #layout39>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc501)
+    %143 = "ttnn.to_layout"(%arg14, %0) <{layout = #ttnn.layout<tile>}> : (tensor<64x1x1xf32, #layout1>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc502)
+    %144 = "ttnn.to_device"(%143, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<64x1x1xf32, #layout36>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc502)
+    %145 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x56x56>}> : (!tt.device<#device>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc502)
+    %146 = "ttnn.add"(%142, %144, %145) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x64x56x56xf32, #layout39>, tensor<64x1x1xf32, #layout36>, tensor<1x64x56x56xf32, #layout39>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc502)
+    %147 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x56x56>}> : (!tt.device<#device>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc503)
+    %148 = "ttnn.relu"(%146, %147) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x64x56x56xf32, #layout39>, tensor<1x64x56x56xf32, #layout39>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc503)
+    %149 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x64x56>}> : (!tt.device<#device>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc504)
+    %150 = "ttnn.transpose"(%148, %149) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x64x56x56xf32, #layout39>, tensor<1x56x64x56xf32, #layout40>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc504)
+    %151 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x56x64>}> : (!tt.device<#device>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc505)
+    %152 = "ttnn.transpose"(%150, %151) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x56x64x56xf32, #layout40>, tensor<1x56x56x64xf32, #layout41>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc505)
+    %153 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x56x256>}> : (!tt.device<#device>) -> tensor<1x56x56x256xf32, #layout42> loc(#loc506)
+    %154 = "ttnn.conv2d"(%152, %arg114, %153, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 64 : i32, input_height = 56 : i32, input_width = 56 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 256 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x56x56x64xf32, #layout41>, tensor<256x64x1x1xf32, #layout10>, tensor<1x56x56x256xf32, #layout42>, !tt.device<#device>) -> tensor<1x56x56x256xf32, #layout42> loc(#loc506)
+    %155 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x256x56>}> : (!tt.device<#device>) -> tensor<1x56x256x56xf32, #layout43> loc(#loc507)
+    %156 = "ttnn.transpose"(%154, %155) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x56x56x256xf32, #layout42>, tensor<1x56x256x56xf32, #layout43>) -> tensor<1x56x256x56xf32, #layout43> loc(#loc507)
+    %157 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x56x56>}> : (!tt.device<#device>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc508)
+    %158 = "ttnn.transpose"(%156, %157) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x56x256x56xf32, #layout43>, tensor<1x256x56x56xf32, #layout44>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc508)
+    %159 = "ttnn.to_layout"(%arg15, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc509)
+    %160 = "ttnn.to_device"(%159, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc509)
+    %161 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x56x56>}> : (!tt.device<#device>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc509)
+    %162 = "ttnn.multiply"(%158, %160, %161) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x56x56xf32, #layout44>, tensor<256x1x1xf32, #layout45>, tensor<1x256x56x56xf32, #layout44>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc509)
+    %163 = "ttnn.to_layout"(%arg16, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc510)
+    %164 = "ttnn.to_device"(%163, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc510)
+    %165 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x56x56>}> : (!tt.device<#device>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc510)
+    %166 = "ttnn.add"(%162, %164, %165) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x56x56xf32, #layout44>, tensor<256x1x1xf32, #layout45>, tensor<1x256x56x56xf32, #layout44>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc510)
+    %167 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x56x56>}> : (!tt.device<#device>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc511)
+    %168 = "ttnn.add"(%166, %108, %167) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x56x56xf32, #layout44>, tensor<1x256x56x56xf32, #layout44>, tensor<1x256x56x56xf32, #layout44>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc511)
+    %169 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x56x56>}> : (!tt.device<#device>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc512)
+    %170 = "ttnn.relu"(%168, %169) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x256x56x56xf32, #layout44>, tensor<1x256x56x56xf32, #layout44>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc512)
+    %171 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x256x56>}> : (!tt.device<#device>) -> tensor<1x56x256x56xf32, #layout43> loc(#loc513)
+    %172 = "ttnn.transpose"(%170, %171) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x256x56x56xf32, #layout44>, tensor<1x56x256x56xf32, #layout43>) -> tensor<1x56x256x56xf32, #layout43> loc(#loc513)
+    %173 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x56x256>}> : (!tt.device<#device>) -> tensor<1x56x56x256xf32, #layout42> loc(#loc514)
+    %174 = "ttnn.transpose"(%172, %173) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x56x256x56xf32, #layout43>, tensor<1x56x56x256xf32, #layout42>) -> tensor<1x56x56x256xf32, #layout42> loc(#loc514)
+    %175 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x56x64>}> : (!tt.device<#device>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc515)
+    %176 = "ttnn.conv2d"(%174, %arg115, %175, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 256 : i32, input_height = 56 : i32, input_width = 56 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 64 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x56x56x256xf32, #layout42>, tensor<64x256x1x1xf32, #layout11>, tensor<1x56x56x64xf32, #layout41>, !tt.device<#device>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc515)
+    %177 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x64x56>}> : (!tt.device<#device>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc516)
+    %178 = "ttnn.transpose"(%176, %177) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x56x56x64xf32, #layout41>, tensor<1x56x64x56xf32, #layout40>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc516)
+    %179 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x56x56>}> : (!tt.device<#device>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc517)
+    %180 = "ttnn.transpose"(%178, %179) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x56x64x56xf32, #layout40>, tensor<1x64x56x56xf32, #layout39>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc517)
+    %181 = "ttnn.to_layout"(%arg17, %0) <{layout = #ttnn.layout<tile>}> : (tensor<64x1x1xf32, #layout1>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc518)
+    %182 = "ttnn.to_device"(%181, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<64x1x1xf32, #layout36>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc518)
+    %183 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x56x56>}> : (!tt.device<#device>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc518)
+    %184 = "ttnn.multiply"(%180, %182, %183) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x64x56x56xf32, #layout39>, tensor<64x1x1xf32, #layout36>, tensor<1x64x56x56xf32, #layout39>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc518)
+    %185 = "ttnn.to_layout"(%arg18, %0) <{layout = #ttnn.layout<tile>}> : (tensor<64x1x1xf32, #layout1>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc519)
+    %186 = "ttnn.to_device"(%185, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<64x1x1xf32, #layout36>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc519)
+    %187 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x56x56>}> : (!tt.device<#device>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc519)
+    %188 = "ttnn.add"(%184, %186, %187) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x64x56x56xf32, #layout39>, tensor<64x1x1xf32, #layout36>, tensor<1x64x56x56xf32, #layout39>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc519)
+    %189 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x56x56>}> : (!tt.device<#device>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc520)
+    %190 = "ttnn.relu"(%188, %189) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x64x56x56xf32, #layout39>, tensor<1x64x56x56xf32, #layout39>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc520)
+    %191 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x64x56>}> : (!tt.device<#device>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc521)
+    %192 = "ttnn.transpose"(%190, %191) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x64x56x56xf32, #layout39>, tensor<1x56x64x56xf32, #layout40>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc521)
+    %193 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x56x64>}> : (!tt.device<#device>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc522)
+    %194 = "ttnn.transpose"(%192, %193) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x56x64x56xf32, #layout40>, tensor<1x56x56x64xf32, #layout41>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc522)
+    %195 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x56x64>}> : (!tt.device<#device>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc523)
+    %196 = "ttnn.conv2d"(%194, %arg116, %195, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 64 : i32, input_height = 56 : i32, input_width = 56 : i32, kernel_height = 3 : i32, kernel_width = 3 : i32, out_channels = 64 : i32, padding_height = 1 : i32, padding_width = 1 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x56x56x64xf32, #layout41>, tensor<64x64x3x3xf32, #layout9>, tensor<1x56x56x64xf32, #layout41>, !tt.device<#device>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc523)
+    %197 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x64x56>}> : (!tt.device<#device>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc524)
+    %198 = "ttnn.transpose"(%196, %197) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x56x56x64xf32, #layout41>, tensor<1x56x64x56xf32, #layout40>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc524)
+    %199 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x56x56>}> : (!tt.device<#device>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc525)
+    %200 = "ttnn.transpose"(%198, %199) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x56x64x56xf32, #layout40>, tensor<1x64x56x56xf32, #layout39>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc525)
+    %201 = "ttnn.to_layout"(%arg19, %0) <{layout = #ttnn.layout<tile>}> : (tensor<64x1x1xf32, #layout1>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc526)
+    %202 = "ttnn.to_device"(%201, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<64x1x1xf32, #layout36>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc526)
+    %203 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x56x56>}> : (!tt.device<#device>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc526)
+    %204 = "ttnn.multiply"(%200, %202, %203) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x64x56x56xf32, #layout39>, tensor<64x1x1xf32, #layout36>, tensor<1x64x56x56xf32, #layout39>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc526)
+    %205 = "ttnn.to_layout"(%arg20, %0) <{layout = #ttnn.layout<tile>}> : (tensor<64x1x1xf32, #layout1>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc527)
+    %206 = "ttnn.to_device"(%205, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<64x1x1xf32, #layout36>, !tt.device<#device>) -> tensor<64x1x1xf32, #layout36> loc(#loc527)
+    %207 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x56x56>}> : (!tt.device<#device>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc527)
+    %208 = "ttnn.add"(%204, %206, %207) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x64x56x56xf32, #layout39>, tensor<64x1x1xf32, #layout36>, tensor<1x64x56x56xf32, #layout39>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc527)
+    %209 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x64x56x56>}> : (!tt.device<#device>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc528)
+    %210 = "ttnn.relu"(%208, %209) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x64x56x56xf32, #layout39>, tensor<1x64x56x56xf32, #layout39>) -> tensor<1x64x56x56xf32, #layout39> loc(#loc528)
+    %211 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x64x56>}> : (!tt.device<#device>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc529)
+    %212 = "ttnn.transpose"(%210, %211) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x64x56x56xf32, #layout39>, tensor<1x56x64x56xf32, #layout40>) -> tensor<1x56x64x56xf32, #layout40> loc(#loc529)
+    %213 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x56x64>}> : (!tt.device<#device>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc530)
+    %214 = "ttnn.transpose"(%212, %213) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x56x64x56xf32, #layout40>, tensor<1x56x56x64xf32, #layout41>) -> tensor<1x56x56x64xf32, #layout41> loc(#loc530)
+    %215 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x56x256>}> : (!tt.device<#device>) -> tensor<1x56x56x256xf32, #layout42> loc(#loc531)
+    %216 = "ttnn.conv2d"(%214, %arg117, %215, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 64 : i32, input_height = 56 : i32, input_width = 56 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 256 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x56x56x64xf32, #layout41>, tensor<256x64x1x1xf32, #layout10>, tensor<1x56x56x256xf32, #layout42>, !tt.device<#device>) -> tensor<1x56x56x256xf32, #layout42> loc(#loc531)
+    %217 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x256x56>}> : (!tt.device<#device>) -> tensor<1x56x256x56xf32, #layout43> loc(#loc532)
+    %218 = "ttnn.transpose"(%216, %217) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x56x56x256xf32, #layout42>, tensor<1x56x256x56xf32, #layout43>) -> tensor<1x56x256x56xf32, #layout43> loc(#loc532)
+    %219 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x56x56>}> : (!tt.device<#device>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc533)
+    %220 = "ttnn.transpose"(%218, %219) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x56x256x56xf32, #layout43>, tensor<1x256x56x56xf32, #layout44>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc533)
+    %221 = "ttnn.to_layout"(%arg21, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc534)
+    %222 = "ttnn.to_device"(%221, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc534)
+    %223 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x56x56>}> : (!tt.device<#device>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc534)
+    %224 = "ttnn.multiply"(%220, %222, %223) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x56x56xf32, #layout44>, tensor<256x1x1xf32, #layout45>, tensor<1x256x56x56xf32, #layout44>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc534)
+    %225 = "ttnn.to_layout"(%arg22, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc535)
+    %226 = "ttnn.to_device"(%225, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc535)
+    %227 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x56x56>}> : (!tt.device<#device>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc535)
+    %228 = "ttnn.add"(%224, %226, %227) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x56x56xf32, #layout44>, tensor<256x1x1xf32, #layout45>, tensor<1x256x56x56xf32, #layout44>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc535)
+    %229 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x56x56>}> : (!tt.device<#device>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc536)
+    %230 = "ttnn.add"(%228, %170, %229) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x56x56xf32, #layout44>, tensor<1x256x56x56xf32, #layout44>, tensor<1x256x56x56xf32, #layout44>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc536)
+    %231 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x56x56>}> : (!tt.device<#device>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc537)
+    %232 = "ttnn.relu"(%230, %231) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x256x56x56xf32, #layout44>, tensor<1x256x56x56xf32, #layout44>) -> tensor<1x256x56x56xf32, #layout44> loc(#loc537)
+    %233 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x256x56>}> : (!tt.device<#device>) -> tensor<1x56x256x56xf32, #layout43> loc(#loc538)
+    %234 = "ttnn.transpose"(%232, %233) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x256x56x56xf32, #layout44>, tensor<1x56x256x56xf32, #layout43>) -> tensor<1x56x256x56xf32, #layout43> loc(#loc538)
+    %235 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x56x256>}> : (!tt.device<#device>) -> tensor<1x56x56x256xf32, #layout42> loc(#loc539)
+    %236 = "ttnn.transpose"(%234, %235) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x56x256x56xf32, #layout43>, tensor<1x56x56x256xf32, #layout42>) -> tensor<1x56x56x256xf32, #layout42> loc(#loc539)
+    %237 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x56x128>}> : (!tt.device<#device>) -> tensor<1x56x56x128xf32, #layout46> loc(#loc540)
+    %238 = "ttnn.conv2d"(%236, %arg118, %237, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 256 : i32, input_height = 56 : i32, input_width = 56 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 128 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x56x56x256xf32, #layout42>, tensor<128x256x1x1xf32, #layout12>, tensor<1x56x56x128xf32, #layout46>, !tt.device<#device>) -> tensor<1x56x56x128xf32, #layout46> loc(#loc540)
+    %239 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x128x56>}> : (!tt.device<#device>) -> tensor<1x56x128x56xf32, #layout47> loc(#loc541)
+    %240 = "ttnn.transpose"(%238, %239) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x56x56x128xf32, #layout46>, tensor<1x56x128x56xf32, #layout47>) -> tensor<1x56x128x56xf32, #layout47> loc(#loc541)
+    %241 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x56x56>}> : (!tt.device<#device>) -> tensor<1x128x56x56xf32, #layout48> loc(#loc542)
+    %242 = "ttnn.transpose"(%240, %241) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x56x128x56xf32, #layout47>, tensor<1x128x56x56xf32, #layout48>) -> tensor<1x128x56x56xf32, #layout48> loc(#loc542)
+    %243 = "ttnn.to_layout"(%arg23, %0) <{layout = #ttnn.layout<tile>}> : (tensor<128x1x1xf32, #layout3>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc543)
+    %244 = "ttnn.to_device"(%243, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<128x1x1xf32, #layout49>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc543)
+    %245 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x56x56>}> : (!tt.device<#device>) -> tensor<1x128x56x56xf32, #layout48> loc(#loc543)
+    %246 = "ttnn.multiply"(%242, %244, %245) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x128x56x56xf32, #layout48>, tensor<128x1x1xf32, #layout49>, tensor<1x128x56x56xf32, #layout48>) -> tensor<1x128x56x56xf32, #layout48> loc(#loc543)
+    %247 = "ttnn.to_layout"(%arg24, %0) <{layout = #ttnn.layout<tile>}> : (tensor<128x1x1xf32, #layout3>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc544)
+    %248 = "ttnn.to_device"(%247, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<128x1x1xf32, #layout49>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc544)
+    %249 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x56x56>}> : (!tt.device<#device>) -> tensor<1x128x56x56xf32, #layout48> loc(#loc544)
+    %250 = "ttnn.add"(%246, %248, %249) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x128x56x56xf32, #layout48>, tensor<128x1x1xf32, #layout49>, tensor<1x128x56x56xf32, #layout48>) -> tensor<1x128x56x56xf32, #layout48> loc(#loc544)
+    %251 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x56x56>}> : (!tt.device<#device>) -> tensor<1x128x56x56xf32, #layout48> loc(#loc545)
+    %252 = "ttnn.relu"(%250, %251) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x128x56x56xf32, #layout48>, tensor<1x128x56x56xf32, #layout48>) -> tensor<1x128x56x56xf32, #layout48> loc(#loc545)
+    %253 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x128x56>}> : (!tt.device<#device>) -> tensor<1x56x128x56xf32, #layout47> loc(#loc546)
+    %254 = "ttnn.transpose"(%252, %253) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x128x56x56xf32, #layout48>, tensor<1x56x128x56xf32, #layout47>) -> tensor<1x56x128x56xf32, #layout47> loc(#loc546)
+    %255 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x56x128>}> : (!tt.device<#device>) -> tensor<1x56x56x128xf32, #layout46> loc(#loc547)
+    %256 = "ttnn.transpose"(%254, %255) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x56x128x56xf32, #layout47>, tensor<1x56x56x128xf32, #layout46>) -> tensor<1x56x56x128xf32, #layout46> loc(#loc547)
+    %257 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x128>}> : (!tt.device<#device>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc548)
+    %258 = "ttnn.conv2d"(%256, %arg119, %257, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 128 : i32, input_height = 56 : i32, input_width = 56 : i32, kernel_height = 3 : i32, kernel_width = 3 : i32, out_channels = 128 : i32, padding_height = 1 : i32, padding_width = 1 : i32, stride_height = 2 : i32, stride_width = 2 : i32}> : (tensor<1x56x56x128xf32, #layout46>, tensor<128x128x3x3xf32, #layout13>, tensor<1x28x28x128xf32, #layout50>, !tt.device<#device>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc548)
+    %259 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x128x28>}> : (!tt.device<#device>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc549)
+    %260 = "ttnn.transpose"(%258, %259) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x28x128xf32, #layout50>, tensor<1x28x128x28xf32, #layout51>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc549)
+    %261 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc550)
+    %262 = "ttnn.transpose"(%260, %261) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x28x128x28xf32, #layout51>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc550)
+    %263 = "ttnn.to_layout"(%arg25, %0) <{layout = #ttnn.layout<tile>}> : (tensor<128x1x1xf32, #layout3>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc551)
+    %264 = "ttnn.to_device"(%263, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<128x1x1xf32, #layout49>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc551)
+    %265 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc551)
+    %266 = "ttnn.multiply"(%262, %264, %265) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x128x28x28xf32, #layout52>, tensor<128x1x1xf32, #layout49>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc551)
+    %267 = "ttnn.to_layout"(%arg26, %0) <{layout = #ttnn.layout<tile>}> : (tensor<128x1x1xf32, #layout3>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc552)
+    %268 = "ttnn.to_device"(%267, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<128x1x1xf32, #layout49>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc552)
+    %269 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc552)
+    %270 = "ttnn.add"(%266, %268, %269) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x128x28x28xf32, #layout52>, tensor<128x1x1xf32, #layout49>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc552)
+    %271 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc553)
+    %272 = "ttnn.relu"(%270, %271) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x128x28x28xf32, #layout52>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc553)
+    %273 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x128x28>}> : (!tt.device<#device>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc554)
+    %274 = "ttnn.transpose"(%272, %273) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x128x28x28xf32, #layout52>, tensor<1x28x128x28xf32, #layout51>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc554)
+    %275 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x128>}> : (!tt.device<#device>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc555)
+    %276 = "ttnn.transpose"(%274, %275) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x128x28xf32, #layout51>, tensor<1x28x28x128xf32, #layout50>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc555)
+    %277 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x512>}> : (!tt.device<#device>) -> tensor<1x28x28x512xf32, #layout53> loc(#loc556)
+    %278 = "ttnn.conv2d"(%276, %arg120, %277, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 128 : i32, input_height = 28 : i32, input_width = 28 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 512 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x28x28x128xf32, #layout50>, tensor<512x128x1x1xf32, #layout14>, tensor<1x28x28x512xf32, #layout53>, !tt.device<#device>) -> tensor<1x28x28x512xf32, #layout53> loc(#loc556)
+    %279 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x512x28>}> : (!tt.device<#device>) -> tensor<1x28x512x28xf32, #layout54> loc(#loc557)
+    %280 = "ttnn.transpose"(%278, %279) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x28x512xf32, #layout53>, tensor<1x28x512x28xf32, #layout54>) -> tensor<1x28x512x28xf32, #layout54> loc(#loc557)
+    %281 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x28x28>}> : (!tt.device<#device>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc558)
+    %282 = "ttnn.transpose"(%280, %281) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x28x512x28xf32, #layout54>, tensor<1x512x28x28xf32, #layout55>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc558)
+    %283 = "ttnn.to_layout"(%arg27, %0) <{layout = #ttnn.layout<tile>}> : (tensor<512x1x1xf32, #layout4>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc559)
+    %284 = "ttnn.to_device"(%283, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<512x1x1xf32, #layout56>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc559)
+    %285 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x28x28>}> : (!tt.device<#device>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc559)
+    %286 = "ttnn.multiply"(%282, %284, %285) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x28x28xf32, #layout55>, tensor<512x1x1xf32, #layout56>, tensor<1x512x28x28xf32, #layout55>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc559)
+    %287 = "ttnn.to_layout"(%arg28, %0) <{layout = #ttnn.layout<tile>}> : (tensor<512x1x1xf32, #layout4>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc560)
+    %288 = "ttnn.to_device"(%287, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<512x1x1xf32, #layout56>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc560)
+    %289 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x28x28>}> : (!tt.device<#device>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc560)
+    %290 = "ttnn.add"(%286, %288, %289) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x28x28xf32, #layout55>, tensor<512x1x1xf32, #layout56>, tensor<1x512x28x28xf32, #layout55>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc560)
+    %291 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x256x56>}> : (!tt.device<#device>) -> tensor<1x56x256x56xf32, #layout43> loc(#loc561)
+    %292 = "ttnn.transpose"(%232, %291) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x256x56x56xf32, #layout44>, tensor<1x56x256x56xf32, #layout43>) -> tensor<1x56x256x56xf32, #layout43> loc(#loc561)
+    %293 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x56x56x256>}> : (!tt.device<#device>) -> tensor<1x56x56x256xf32, #layout42> loc(#loc562)
+    %294 = "ttnn.transpose"(%292, %293) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x56x256x56xf32, #layout43>, tensor<1x56x56x256xf32, #layout42>) -> tensor<1x56x56x256xf32, #layout42> loc(#loc562)
+    %295 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x512>}> : (!tt.device<#device>) -> tensor<1x28x28x512xf32, #layout53> loc(#loc563)
+    %296 = "ttnn.conv2d"(%294, %arg121, %295, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 256 : i32, input_height = 56 : i32, input_width = 56 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 512 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 2 : i32, stride_width = 2 : i32}> : (tensor<1x56x56x256xf32, #layout42>, tensor<512x256x1x1xf32, #layout15>, tensor<1x28x28x512xf32, #layout53>, !tt.device<#device>) -> tensor<1x28x28x512xf32, #layout53> loc(#loc563)
+    %297 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x512x28>}> : (!tt.device<#device>) -> tensor<1x28x512x28xf32, #layout54> loc(#loc564)
+    %298 = "ttnn.transpose"(%296, %297) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x28x512xf32, #layout53>, tensor<1x28x512x28xf32, #layout54>) -> tensor<1x28x512x28xf32, #layout54> loc(#loc564)
+    %299 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x28x28>}> : (!tt.device<#device>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc565)
+    %300 = "ttnn.transpose"(%298, %299) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x28x512x28xf32, #layout54>, tensor<1x512x28x28xf32, #layout55>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc565)
+    %301 = "ttnn.to_layout"(%arg29, %0) <{layout = #ttnn.layout<tile>}> : (tensor<512x1x1xf32, #layout4>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc566)
+    %302 = "ttnn.to_device"(%301, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<512x1x1xf32, #layout56>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc566)
+    %303 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x28x28>}> : (!tt.device<#device>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc566)
+    %304 = "ttnn.multiply"(%300, %302, %303) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x28x28xf32, #layout55>, tensor<512x1x1xf32, #layout56>, tensor<1x512x28x28xf32, #layout55>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc566)
+    %305 = "ttnn.to_layout"(%arg30, %0) <{layout = #ttnn.layout<tile>}> : (tensor<512x1x1xf32, #layout4>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc567)
+    %306 = "ttnn.to_device"(%305, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<512x1x1xf32, #layout56>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc567)
+    %307 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x28x28>}> : (!tt.device<#device>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc567)
+    %308 = "ttnn.add"(%304, %306, %307) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x28x28xf32, #layout55>, tensor<512x1x1xf32, #layout56>, tensor<1x512x28x28xf32, #layout55>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc567)
+    %309 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x28x28>}> : (!tt.device<#device>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc568)
+    %310 = "ttnn.add"(%290, %308, %309) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x28x28xf32, #layout55>, tensor<1x512x28x28xf32, #layout55>, tensor<1x512x28x28xf32, #layout55>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc568)
+    %311 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x28x28>}> : (!tt.device<#device>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc569)
+    %312 = "ttnn.relu"(%310, %311) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x512x28x28xf32, #layout55>, tensor<1x512x28x28xf32, #layout55>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc569)
+    %313 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x512x28>}> : (!tt.device<#device>) -> tensor<1x28x512x28xf32, #layout54> loc(#loc570)
+    %314 = "ttnn.transpose"(%312, %313) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x512x28x28xf32, #layout55>, tensor<1x28x512x28xf32, #layout54>) -> tensor<1x28x512x28xf32, #layout54> loc(#loc570)
+    %315 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x512>}> : (!tt.device<#device>) -> tensor<1x28x28x512xf32, #layout53> loc(#loc571)
+    %316 = "ttnn.transpose"(%314, %315) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x512x28xf32, #layout54>, tensor<1x28x28x512xf32, #layout53>) -> tensor<1x28x28x512xf32, #layout53> loc(#loc571)
+    %317 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x128>}> : (!tt.device<#device>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc572)
+    %318 = "ttnn.conv2d"(%316, %arg122, %317, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 512 : i32, input_height = 28 : i32, input_width = 28 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 128 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x28x28x512xf32, #layout53>, tensor<128x512x1x1xf32, #layout16>, tensor<1x28x28x128xf32, #layout50>, !tt.device<#device>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc572)
+    %319 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x128x28>}> : (!tt.device<#device>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc573)
+    %320 = "ttnn.transpose"(%318, %319) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x28x128xf32, #layout50>, tensor<1x28x128x28xf32, #layout51>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc573)
+    %321 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc574)
+    %322 = "ttnn.transpose"(%320, %321) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x28x128x28xf32, #layout51>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc574)
+    %323 = "ttnn.to_layout"(%arg31, %0) <{layout = #ttnn.layout<tile>}> : (tensor<128x1x1xf32, #layout3>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc575)
+    %324 = "ttnn.to_device"(%323, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<128x1x1xf32, #layout49>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc575)
+    %325 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc575)
+    %326 = "ttnn.multiply"(%322, %324, %325) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x128x28x28xf32, #layout52>, tensor<128x1x1xf32, #layout49>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc575)
+    %327 = "ttnn.to_layout"(%arg32, %0) <{layout = #ttnn.layout<tile>}> : (tensor<128x1x1xf32, #layout3>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc576)
+    %328 = "ttnn.to_device"(%327, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<128x1x1xf32, #layout49>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc576)
+    %329 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc576)
+    %330 = "ttnn.add"(%326, %328, %329) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x128x28x28xf32, #layout52>, tensor<128x1x1xf32, #layout49>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc576)
+    %331 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc577)
+    %332 = "ttnn.relu"(%330, %331) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x128x28x28xf32, #layout52>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc577)
+    %333 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x128x28>}> : (!tt.device<#device>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc578)
+    %334 = "ttnn.transpose"(%332, %333) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x128x28x28xf32, #layout52>, tensor<1x28x128x28xf32, #layout51>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc578)
+    %335 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x128>}> : (!tt.device<#device>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc579)
+    %336 = "ttnn.transpose"(%334, %335) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x128x28xf32, #layout51>, tensor<1x28x28x128xf32, #layout50>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc579)
+    %337 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x128>}> : (!tt.device<#device>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc580)
+    %338 = "ttnn.conv2d"(%336, %arg123, %337, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 128 : i32, input_height = 28 : i32, input_width = 28 : i32, kernel_height = 3 : i32, kernel_width = 3 : i32, out_channels = 128 : i32, padding_height = 1 : i32, padding_width = 1 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x28x28x128xf32, #layout50>, tensor<128x128x3x3xf32, #layout13>, tensor<1x28x28x128xf32, #layout50>, !tt.device<#device>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc580)
+    %339 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x128x28>}> : (!tt.device<#device>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc581)
+    %340 = "ttnn.transpose"(%338, %339) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x28x128xf32, #layout50>, tensor<1x28x128x28xf32, #layout51>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc581)
+    %341 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc582)
+    %342 = "ttnn.transpose"(%340, %341) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x28x128x28xf32, #layout51>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc582)
+    %343 = "ttnn.to_layout"(%arg33, %0) <{layout = #ttnn.layout<tile>}> : (tensor<128x1x1xf32, #layout3>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc583)
+    %344 = "ttnn.to_device"(%343, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<128x1x1xf32, #layout49>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc583)
+    %345 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc583)
+    %346 = "ttnn.multiply"(%342, %344, %345) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x128x28x28xf32, #layout52>, tensor<128x1x1xf32, #layout49>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc583)
+    %347 = "ttnn.to_layout"(%arg34, %0) <{layout = #ttnn.layout<tile>}> : (tensor<128x1x1xf32, #layout3>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc584)
+    %348 = "ttnn.to_device"(%347, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<128x1x1xf32, #layout49>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc584)
+    %349 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc584)
+    %350 = "ttnn.add"(%346, %348, %349) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x128x28x28xf32, #layout52>, tensor<128x1x1xf32, #layout49>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc584)
+    %351 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc585)
+    %352 = "ttnn.relu"(%350, %351) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x128x28x28xf32, #layout52>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc585)
+    %353 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x128x28>}> : (!tt.device<#device>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc586)
+    %354 = "ttnn.transpose"(%352, %353) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x128x28x28xf32, #layout52>, tensor<1x28x128x28xf32, #layout51>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc586)
+    %355 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x128>}> : (!tt.device<#device>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc587)
+    %356 = "ttnn.transpose"(%354, %355) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x128x28xf32, #layout51>, tensor<1x28x28x128xf32, #layout50>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc587)
+    %357 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x512>}> : (!tt.device<#device>) -> tensor<1x28x28x512xf32, #layout53> loc(#loc588)
+    %358 = "ttnn.conv2d"(%356, %arg124, %357, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 128 : i32, input_height = 28 : i32, input_width = 28 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 512 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x28x28x128xf32, #layout50>, tensor<512x128x1x1xf32, #layout14>, tensor<1x28x28x512xf32, #layout53>, !tt.device<#device>) -> tensor<1x28x28x512xf32, #layout53> loc(#loc588)
+    %359 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x512x28>}> : (!tt.device<#device>) -> tensor<1x28x512x28xf32, #layout54> loc(#loc589)
+    %360 = "ttnn.transpose"(%358, %359) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x28x512xf32, #layout53>, tensor<1x28x512x28xf32, #layout54>) -> tensor<1x28x512x28xf32, #layout54> loc(#loc589)
+    %361 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x28x28>}> : (!tt.device<#device>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc590)
+    %362 = "ttnn.transpose"(%360, %361) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x28x512x28xf32, #layout54>, tensor<1x512x28x28xf32, #layout55>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc590)
+    %363 = "ttnn.to_layout"(%arg35, %0) <{layout = #ttnn.layout<tile>}> : (tensor<512x1x1xf32, #layout4>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc591)
+    %364 = "ttnn.to_device"(%363, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<512x1x1xf32, #layout56>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc591)
+    %365 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x28x28>}> : (!tt.device<#device>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc591)
+    %366 = "ttnn.multiply"(%362, %364, %365) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x28x28xf32, #layout55>, tensor<512x1x1xf32, #layout56>, tensor<1x512x28x28xf32, #layout55>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc591)
+    %367 = "ttnn.to_layout"(%arg36, %0) <{layout = #ttnn.layout<tile>}> : (tensor<512x1x1xf32, #layout4>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc592)
+    %368 = "ttnn.to_device"(%367, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<512x1x1xf32, #layout56>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc592)
+    %369 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x28x28>}> : (!tt.device<#device>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc592)
+    %370 = "ttnn.add"(%366, %368, %369) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x28x28xf32, #layout55>, tensor<512x1x1xf32, #layout56>, tensor<1x512x28x28xf32, #layout55>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc592)
+    %371 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x28x28>}> : (!tt.device<#device>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc593)
+    %372 = "ttnn.add"(%370, %312, %371) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x28x28xf32, #layout55>, tensor<1x512x28x28xf32, #layout55>, tensor<1x512x28x28xf32, #layout55>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc593)
+    %373 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x28x28>}> : (!tt.device<#device>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc594)
+    %374 = "ttnn.relu"(%372, %373) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x512x28x28xf32, #layout55>, tensor<1x512x28x28xf32, #layout55>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc594)
+    %375 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x512x28>}> : (!tt.device<#device>) -> tensor<1x28x512x28xf32, #layout54> loc(#loc595)
+    %376 = "ttnn.transpose"(%374, %375) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x512x28x28xf32, #layout55>, tensor<1x28x512x28xf32, #layout54>) -> tensor<1x28x512x28xf32, #layout54> loc(#loc595)
+    %377 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x512>}> : (!tt.device<#device>) -> tensor<1x28x28x512xf32, #layout53> loc(#loc596)
+    %378 = "ttnn.transpose"(%376, %377) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x512x28xf32, #layout54>, tensor<1x28x28x512xf32, #layout53>) -> tensor<1x28x28x512xf32, #layout53> loc(#loc596)
+    %379 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x128>}> : (!tt.device<#device>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc597)
+    %380 = "ttnn.conv2d"(%378, %arg125, %379, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 512 : i32, input_height = 28 : i32, input_width = 28 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 128 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x28x28x512xf32, #layout53>, tensor<128x512x1x1xf32, #layout16>, tensor<1x28x28x128xf32, #layout50>, !tt.device<#device>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc597)
+    %381 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x128x28>}> : (!tt.device<#device>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc598)
+    %382 = "ttnn.transpose"(%380, %381) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x28x128xf32, #layout50>, tensor<1x28x128x28xf32, #layout51>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc598)
+    %383 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc599)
+    %384 = "ttnn.transpose"(%382, %383) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x28x128x28xf32, #layout51>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc599)
+    %385 = "ttnn.to_layout"(%arg37, %0) <{layout = #ttnn.layout<tile>}> : (tensor<128x1x1xf32, #layout3>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc600)
+    %386 = "ttnn.to_device"(%385, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<128x1x1xf32, #layout49>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc600)
+    %387 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc600)
+    %388 = "ttnn.multiply"(%384, %386, %387) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x128x28x28xf32, #layout52>, tensor<128x1x1xf32, #layout49>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc600)
+    %389 = "ttnn.to_layout"(%arg38, %0) <{layout = #ttnn.layout<tile>}> : (tensor<128x1x1xf32, #layout3>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc601)
+    %390 = "ttnn.to_device"(%389, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<128x1x1xf32, #layout49>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc601)
+    %391 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc601)
+    %392 = "ttnn.add"(%388, %390, %391) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x128x28x28xf32, #layout52>, tensor<128x1x1xf32, #layout49>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc601)
+    %393 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc602)
+    %394 = "ttnn.relu"(%392, %393) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x128x28x28xf32, #layout52>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc602)
+    %395 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x128x28>}> : (!tt.device<#device>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc603)
+    %396 = "ttnn.transpose"(%394, %395) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x128x28x28xf32, #layout52>, tensor<1x28x128x28xf32, #layout51>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc603)
+    %397 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x128>}> : (!tt.device<#device>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc604)
+    %398 = "ttnn.transpose"(%396, %397) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x128x28xf32, #layout51>, tensor<1x28x28x128xf32, #layout50>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc604)
+    %399 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x128>}> : (!tt.device<#device>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc605)
+    %400 = "ttnn.conv2d"(%398, %arg126, %399, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 128 : i32, input_height = 28 : i32, input_width = 28 : i32, kernel_height = 3 : i32, kernel_width = 3 : i32, out_channels = 128 : i32, padding_height = 1 : i32, padding_width = 1 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x28x28x128xf32, #layout50>, tensor<128x128x3x3xf32, #layout13>, tensor<1x28x28x128xf32, #layout50>, !tt.device<#device>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc605)
+    %401 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x128x28>}> : (!tt.device<#device>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc606)
+    %402 = "ttnn.transpose"(%400, %401) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x28x128xf32, #layout50>, tensor<1x28x128x28xf32, #layout51>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc606)
+    %403 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc607)
+    %404 = "ttnn.transpose"(%402, %403) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x28x128x28xf32, #layout51>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc607)
+    %405 = "ttnn.to_layout"(%arg39, %0) <{layout = #ttnn.layout<tile>}> : (tensor<128x1x1xf32, #layout3>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc608)
+    %406 = "ttnn.to_device"(%405, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<128x1x1xf32, #layout49>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc608)
+    %407 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc608)
+    %408 = "ttnn.multiply"(%404, %406, %407) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x128x28x28xf32, #layout52>, tensor<128x1x1xf32, #layout49>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc608)
+    %409 = "ttnn.to_layout"(%arg40, %0) <{layout = #ttnn.layout<tile>}> : (tensor<128x1x1xf32, #layout3>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc609)
+    %410 = "ttnn.to_device"(%409, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<128x1x1xf32, #layout49>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc609)
+    %411 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc609)
+    %412 = "ttnn.add"(%408, %410, %411) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x128x28x28xf32, #layout52>, tensor<128x1x1xf32, #layout49>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc609)
+    %413 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc610)
+    %414 = "ttnn.relu"(%412, %413) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x128x28x28xf32, #layout52>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc610)
+    %415 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x128x28>}> : (!tt.device<#device>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc611)
+    %416 = "ttnn.transpose"(%414, %415) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x128x28x28xf32, #layout52>, tensor<1x28x128x28xf32, #layout51>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc611)
+    %417 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x128>}> : (!tt.device<#device>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc612)
+    %418 = "ttnn.transpose"(%416, %417) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x128x28xf32, #layout51>, tensor<1x28x28x128xf32, #layout50>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc612)
+    %419 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x512>}> : (!tt.device<#device>) -> tensor<1x28x28x512xf32, #layout53> loc(#loc613)
+    %420 = "ttnn.conv2d"(%418, %arg127, %419, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 128 : i32, input_height = 28 : i32, input_width = 28 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 512 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x28x28x128xf32, #layout50>, tensor<512x128x1x1xf32, #layout14>, tensor<1x28x28x512xf32, #layout53>, !tt.device<#device>) -> tensor<1x28x28x512xf32, #layout53> loc(#loc613)
+    %421 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x512x28>}> : (!tt.device<#device>) -> tensor<1x28x512x28xf32, #layout54> loc(#loc614)
+    %422 = "ttnn.transpose"(%420, %421) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x28x512xf32, #layout53>, tensor<1x28x512x28xf32, #layout54>) -> tensor<1x28x512x28xf32, #layout54> loc(#loc614)
+    %423 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x28x28>}> : (!tt.device<#device>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc615)
+    %424 = "ttnn.transpose"(%422, %423) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x28x512x28xf32, #layout54>, tensor<1x512x28x28xf32, #layout55>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc615)
+    %425 = "ttnn.to_layout"(%arg41, %0) <{layout = #ttnn.layout<tile>}> : (tensor<512x1x1xf32, #layout4>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc616)
+    %426 = "ttnn.to_device"(%425, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<512x1x1xf32, #layout56>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc616)
+    %427 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x28x28>}> : (!tt.device<#device>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc616)
+    %428 = "ttnn.multiply"(%424, %426, %427) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x28x28xf32, #layout55>, tensor<512x1x1xf32, #layout56>, tensor<1x512x28x28xf32, #layout55>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc616)
+    %429 = "ttnn.to_layout"(%arg42, %0) <{layout = #ttnn.layout<tile>}> : (tensor<512x1x1xf32, #layout4>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc617)
+    %430 = "ttnn.to_device"(%429, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<512x1x1xf32, #layout56>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc617)
+    %431 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x28x28>}> : (!tt.device<#device>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc617)
+    %432 = "ttnn.add"(%428, %430, %431) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x28x28xf32, #layout55>, tensor<512x1x1xf32, #layout56>, tensor<1x512x28x28xf32, #layout55>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc617)
+    %433 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x28x28>}> : (!tt.device<#device>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc618)
+    %434 = "ttnn.add"(%432, %374, %433) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x28x28xf32, #layout55>, tensor<1x512x28x28xf32, #layout55>, tensor<1x512x28x28xf32, #layout55>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc618)
+    %435 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x28x28>}> : (!tt.device<#device>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc619)
+    %436 = "ttnn.relu"(%434, %435) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x512x28x28xf32, #layout55>, tensor<1x512x28x28xf32, #layout55>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc619)
+    %437 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x512x28>}> : (!tt.device<#device>) -> tensor<1x28x512x28xf32, #layout54> loc(#loc620)
+    %438 = "ttnn.transpose"(%436, %437) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x512x28x28xf32, #layout55>, tensor<1x28x512x28xf32, #layout54>) -> tensor<1x28x512x28xf32, #layout54> loc(#loc620)
+    %439 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x512>}> : (!tt.device<#device>) -> tensor<1x28x28x512xf32, #layout53> loc(#loc621)
+    %440 = "ttnn.transpose"(%438, %439) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x512x28xf32, #layout54>, tensor<1x28x28x512xf32, #layout53>) -> tensor<1x28x28x512xf32, #layout53> loc(#loc621)
+    %441 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x128>}> : (!tt.device<#device>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc622)
+    %442 = "ttnn.conv2d"(%440, %arg128, %441, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 512 : i32, input_height = 28 : i32, input_width = 28 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 128 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x28x28x512xf32, #layout53>, tensor<128x512x1x1xf32, #layout16>, tensor<1x28x28x128xf32, #layout50>, !tt.device<#device>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc622)
+    %443 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x128x28>}> : (!tt.device<#device>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc623)
+    %444 = "ttnn.transpose"(%442, %443) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x28x128xf32, #layout50>, tensor<1x28x128x28xf32, #layout51>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc623)
+    %445 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc624)
+    %446 = "ttnn.transpose"(%444, %445) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x28x128x28xf32, #layout51>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc624)
+    %447 = "ttnn.to_layout"(%arg43, %0) <{layout = #ttnn.layout<tile>}> : (tensor<128x1x1xf32, #layout3>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc625)
+    %448 = "ttnn.to_device"(%447, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<128x1x1xf32, #layout49>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc625)
+    %449 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc625)
+    %450 = "ttnn.multiply"(%446, %448, %449) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x128x28x28xf32, #layout52>, tensor<128x1x1xf32, #layout49>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc625)
+    %451 = "ttnn.to_layout"(%arg44, %0) <{layout = #ttnn.layout<tile>}> : (tensor<128x1x1xf32, #layout3>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc626)
+    %452 = "ttnn.to_device"(%451, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<128x1x1xf32, #layout49>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc626)
+    %453 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc626)
+    %454 = "ttnn.add"(%450, %452, %453) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x128x28x28xf32, #layout52>, tensor<128x1x1xf32, #layout49>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc626)
+    %455 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc627)
+    %456 = "ttnn.relu"(%454, %455) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x128x28x28xf32, #layout52>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc627)
+    %457 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x128x28>}> : (!tt.device<#device>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc628)
+    %458 = "ttnn.transpose"(%456, %457) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x128x28x28xf32, #layout52>, tensor<1x28x128x28xf32, #layout51>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc628)
+    %459 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x128>}> : (!tt.device<#device>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc629)
+    %460 = "ttnn.transpose"(%458, %459) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x128x28xf32, #layout51>, tensor<1x28x28x128xf32, #layout50>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc629)
+    %461 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x128>}> : (!tt.device<#device>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc630)
+    %462 = "ttnn.conv2d"(%460, %arg129, %461, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 128 : i32, input_height = 28 : i32, input_width = 28 : i32, kernel_height = 3 : i32, kernel_width = 3 : i32, out_channels = 128 : i32, padding_height = 1 : i32, padding_width = 1 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x28x28x128xf32, #layout50>, tensor<128x128x3x3xf32, #layout13>, tensor<1x28x28x128xf32, #layout50>, !tt.device<#device>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc630)
+    %463 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x128x28>}> : (!tt.device<#device>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc631)
+    %464 = "ttnn.transpose"(%462, %463) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x28x128xf32, #layout50>, tensor<1x28x128x28xf32, #layout51>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc631)
+    %465 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc632)
+    %466 = "ttnn.transpose"(%464, %465) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x28x128x28xf32, #layout51>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc632)
+    %467 = "ttnn.to_layout"(%arg45, %0) <{layout = #ttnn.layout<tile>}> : (tensor<128x1x1xf32, #layout3>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc633)
+    %468 = "ttnn.to_device"(%467, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<128x1x1xf32, #layout49>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc633)
+    %469 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc633)
+    %470 = "ttnn.multiply"(%466, %468, %469) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x128x28x28xf32, #layout52>, tensor<128x1x1xf32, #layout49>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc633)
+    %471 = "ttnn.to_layout"(%arg46, %0) <{layout = #ttnn.layout<tile>}> : (tensor<128x1x1xf32, #layout3>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc634)
+    %472 = "ttnn.to_device"(%471, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<128x1x1xf32, #layout49>, !tt.device<#device>) -> tensor<128x1x1xf32, #layout49> loc(#loc634)
+    %473 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc634)
+    %474 = "ttnn.add"(%470, %472, %473) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x128x28x28xf32, #layout52>, tensor<128x1x1xf32, #layout49>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc634)
+    %475 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x128x28x28>}> : (!tt.device<#device>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc635)
+    %476 = "ttnn.relu"(%474, %475) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x128x28x28xf32, #layout52>, tensor<1x128x28x28xf32, #layout52>) -> tensor<1x128x28x28xf32, #layout52> loc(#loc635)
+    %477 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x128x28>}> : (!tt.device<#device>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc636)
+    %478 = "ttnn.transpose"(%476, %477) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x128x28x28xf32, #layout52>, tensor<1x28x128x28xf32, #layout51>) -> tensor<1x28x128x28xf32, #layout51> loc(#loc636)
+    %479 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x128>}> : (!tt.device<#device>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc637)
+    %480 = "ttnn.transpose"(%478, %479) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x128x28xf32, #layout51>, tensor<1x28x28x128xf32, #layout50>) -> tensor<1x28x28x128xf32, #layout50> loc(#loc637)
+    %481 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x512>}> : (!tt.device<#device>) -> tensor<1x28x28x512xf32, #layout53> loc(#loc638)
+    %482 = "ttnn.conv2d"(%480, %arg130, %481, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 128 : i32, input_height = 28 : i32, input_width = 28 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 512 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x28x28x128xf32, #layout50>, tensor<512x128x1x1xf32, #layout14>, tensor<1x28x28x512xf32, #layout53>, !tt.device<#device>) -> tensor<1x28x28x512xf32, #layout53> loc(#loc638)
+    %483 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x512x28>}> : (!tt.device<#device>) -> tensor<1x28x512x28xf32, #layout54> loc(#loc639)
+    %484 = "ttnn.transpose"(%482, %483) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x28x512xf32, #layout53>, tensor<1x28x512x28xf32, #layout54>) -> tensor<1x28x512x28xf32, #layout54> loc(#loc639)
+    %485 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x28x28>}> : (!tt.device<#device>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc640)
+    %486 = "ttnn.transpose"(%484, %485) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x28x512x28xf32, #layout54>, tensor<1x512x28x28xf32, #layout55>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc640)
+    %487 = "ttnn.to_layout"(%arg47, %0) <{layout = #ttnn.layout<tile>}> : (tensor<512x1x1xf32, #layout4>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc641)
+    %488 = "ttnn.to_device"(%487, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<512x1x1xf32, #layout56>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc641)
+    %489 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x28x28>}> : (!tt.device<#device>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc641)
+    %490 = "ttnn.multiply"(%486, %488, %489) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x28x28xf32, #layout55>, tensor<512x1x1xf32, #layout56>, tensor<1x512x28x28xf32, #layout55>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc641)
+    %491 = "ttnn.to_layout"(%arg48, %0) <{layout = #ttnn.layout<tile>}> : (tensor<512x1x1xf32, #layout4>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc642)
+    %492 = "ttnn.to_device"(%491, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<512x1x1xf32, #layout56>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc642)
+    %493 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x28x28>}> : (!tt.device<#device>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc642)
+    %494 = "ttnn.add"(%490, %492, %493) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x28x28xf32, #layout55>, tensor<512x1x1xf32, #layout56>, tensor<1x512x28x28xf32, #layout55>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc642)
+    %495 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x28x28>}> : (!tt.device<#device>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc643)
+    %496 = "ttnn.add"(%494, %436, %495) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x28x28xf32, #layout55>, tensor<1x512x28x28xf32, #layout55>, tensor<1x512x28x28xf32, #layout55>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc643)
+    %497 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x28x28>}> : (!tt.device<#device>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc644)
+    %498 = "ttnn.relu"(%496, %497) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x512x28x28xf32, #layout55>, tensor<1x512x28x28xf32, #layout55>) -> tensor<1x512x28x28xf32, #layout55> loc(#loc644)
+    %499 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x512x28>}> : (!tt.device<#device>) -> tensor<1x28x512x28xf32, #layout54> loc(#loc645)
+    %500 = "ttnn.transpose"(%498, %499) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x512x28x28xf32, #layout55>, tensor<1x28x512x28xf32, #layout54>) -> tensor<1x28x512x28xf32, #layout54> loc(#loc645)
+    %501 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x512>}> : (!tt.device<#device>) -> tensor<1x28x28x512xf32, #layout53> loc(#loc646)
+    %502 = "ttnn.transpose"(%500, %501) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x512x28xf32, #layout54>, tensor<1x28x28x512xf32, #layout53>) -> tensor<1x28x28x512xf32, #layout53> loc(#loc646)
+    %503 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x256>}> : (!tt.device<#device>) -> tensor<1x28x28x256xf32, #layout57> loc(#loc647)
+    %504 = "ttnn.conv2d"(%502, %arg131, %503, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 512 : i32, input_height = 28 : i32, input_width = 28 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 256 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x28x28x512xf32, #layout53>, tensor<256x512x1x1xf32, #layout17>, tensor<1x28x28x256xf32, #layout57>, !tt.device<#device>) -> tensor<1x28x28x256xf32, #layout57> loc(#loc647)
+    %505 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x256x28>}> : (!tt.device<#device>) -> tensor<1x28x256x28xf32, #layout58> loc(#loc648)
+    %506 = "ttnn.transpose"(%504, %505) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x28x256xf32, #layout57>, tensor<1x28x256x28xf32, #layout58>) -> tensor<1x28x256x28xf32, #layout58> loc(#loc648)
+    %507 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x28x28>}> : (!tt.device<#device>) -> tensor<1x256x28x28xf32, #layout59> loc(#loc649)
+    %508 = "ttnn.transpose"(%506, %507) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x28x256x28xf32, #layout58>, tensor<1x256x28x28xf32, #layout59>) -> tensor<1x256x28x28xf32, #layout59> loc(#loc649)
+    %509 = "ttnn.to_layout"(%arg49, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc650)
+    %510 = "ttnn.to_device"(%509, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc650)
+    %511 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x28x28>}> : (!tt.device<#device>) -> tensor<1x256x28x28xf32, #layout59> loc(#loc650)
+    %512 = "ttnn.multiply"(%508, %510, %511) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x28x28xf32, #layout59>, tensor<256x1x1xf32, #layout45>, tensor<1x256x28x28xf32, #layout59>) -> tensor<1x256x28x28xf32, #layout59> loc(#loc650)
+    %513 = "ttnn.to_layout"(%arg50, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc651)
+    %514 = "ttnn.to_device"(%513, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc651)
+    %515 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x28x28>}> : (!tt.device<#device>) -> tensor<1x256x28x28xf32, #layout59> loc(#loc651)
+    %516 = "ttnn.add"(%512, %514, %515) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x28x28xf32, #layout59>, tensor<256x1x1xf32, #layout45>, tensor<1x256x28x28xf32, #layout59>) -> tensor<1x256x28x28xf32, #layout59> loc(#loc651)
+    %517 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x28x28>}> : (!tt.device<#device>) -> tensor<1x256x28x28xf32, #layout59> loc(#loc652)
+    %518 = "ttnn.relu"(%516, %517) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x256x28x28xf32, #layout59>, tensor<1x256x28x28xf32, #layout59>) -> tensor<1x256x28x28xf32, #layout59> loc(#loc652)
+    %519 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x256x28>}> : (!tt.device<#device>) -> tensor<1x28x256x28xf32, #layout58> loc(#loc653)
+    %520 = "ttnn.transpose"(%518, %519) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x256x28x28xf32, #layout59>, tensor<1x28x256x28xf32, #layout58>) -> tensor<1x28x256x28xf32, #layout58> loc(#loc653)
+    %521 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x256>}> : (!tt.device<#device>) -> tensor<1x28x28x256xf32, #layout57> loc(#loc654)
+    %522 = "ttnn.transpose"(%520, %521) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x256x28xf32, #layout58>, tensor<1x28x28x256xf32, #layout57>) -> tensor<1x28x28x256xf32, #layout57> loc(#loc654)
+    %523 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x256>}> : (!tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc655)
+    %524 = "ttnn.conv2d"(%522, %arg132, %523, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 256 : i32, input_height = 28 : i32, input_width = 28 : i32, kernel_height = 3 : i32, kernel_width = 3 : i32, out_channels = 256 : i32, padding_height = 1 : i32, padding_width = 1 : i32, stride_height = 2 : i32, stride_width = 2 : i32}> : (tensor<1x28x28x256xf32, #layout57>, tensor<256x256x3x3xf32, #layout18>, tensor<1x14x14x256xf32, #layout60>, !tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc655)
+    %525 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x256x14>}> : (!tt.device<#device>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc656)
+    %526 = "ttnn.transpose"(%524, %525) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x14x256xf32, #layout60>, tensor<1x14x256x14xf32, #layout61>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc656)
+    %527 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc657)
+    %528 = "ttnn.transpose"(%526, %527) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x14x256x14xf32, #layout61>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc657)
+    %529 = "ttnn.to_layout"(%arg51, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc658)
+    %530 = "ttnn.to_device"(%529, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc658)
+    %531 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc658)
+    %532 = "ttnn.multiply"(%528, %530, %531) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<256x1x1xf32, #layout45>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc658)
+    %533 = "ttnn.to_layout"(%arg52, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc659)
+    %534 = "ttnn.to_device"(%533, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc659)
+    %535 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc659)
+    %536 = "ttnn.add"(%532, %534, %535) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<256x1x1xf32, #layout45>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc659)
+    %537 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc660)
+    %538 = "ttnn.relu"(%536, %537) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc660)
+    %539 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x256x14>}> : (!tt.device<#device>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc661)
+    %540 = "ttnn.transpose"(%538, %539) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x256x14x14xf32, #layout62>, tensor<1x14x256x14xf32, #layout61>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc661)
+    %541 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x256>}> : (!tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc662)
+    %542 = "ttnn.transpose"(%540, %541) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x256x14xf32, #layout61>, tensor<1x14x14x256xf32, #layout60>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc662)
+    %543 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x1024>}> : (!tt.device<#device>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc663)
+    %544 = "ttnn.conv2d"(%542, %arg133, %543, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 256 : i32, input_height = 14 : i32, input_width = 14 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 1024 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x14x14x256xf32, #layout60>, tensor<1024x256x1x1xf32, #layout19>, tensor<1x14x14x1024xf32, #layout63>, !tt.device<#device>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc663)
+    %545 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x1024x14>}> : (!tt.device<#device>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc664)
+    %546 = "ttnn.transpose"(%544, %545) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x14x1024xf32, #layout63>, tensor<1x14x1024x14xf32, #layout64>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc664)
+    %547 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc665)
+    %548 = "ttnn.transpose"(%546, %547) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x14x1024x14xf32, #layout64>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc665)
+    %549 = "ttnn.to_layout"(%arg53, %0) <{layout = #ttnn.layout<tile>}> : (tensor<1024x1x1xf32, #layout5>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc666)
+    %550 = "ttnn.to_device"(%549, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<1024x1x1xf32, #layout66>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc666)
+    %551 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc666)
+    %552 = "ttnn.multiply"(%548, %550, %551) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1024x1x1xf32, #layout66>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc666)
+    %553 = "ttnn.to_layout"(%arg54, %0) <{layout = #ttnn.layout<tile>}> : (tensor<1024x1x1xf32, #layout5>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc667)
+    %554 = "ttnn.to_device"(%553, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<1024x1x1xf32, #layout66>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc667)
+    %555 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc667)
+    %556 = "ttnn.add"(%552, %554, %555) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1024x1x1xf32, #layout66>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc667)
+    %557 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x512x28>}> : (!tt.device<#device>) -> tensor<1x28x512x28xf32, #layout54> loc(#loc668)
+    %558 = "ttnn.transpose"(%498, %557) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x512x28x28xf32, #layout55>, tensor<1x28x512x28xf32, #layout54>) -> tensor<1x28x512x28xf32, #layout54> loc(#loc668)
+    %559 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x28x28x512>}> : (!tt.device<#device>) -> tensor<1x28x28x512xf32, #layout53> loc(#loc669)
+    %560 = "ttnn.transpose"(%558, %559) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x28x512x28xf32, #layout54>, tensor<1x28x28x512xf32, #layout53>) -> tensor<1x28x28x512xf32, #layout53> loc(#loc669)
+    %561 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x1024>}> : (!tt.device<#device>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc670)
+    %562 = "ttnn.conv2d"(%560, %arg134, %561, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 512 : i32, input_height = 28 : i32, input_width = 28 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 1024 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 2 : i32, stride_width = 2 : i32}> : (tensor<1x28x28x512xf32, #layout53>, tensor<1024x512x1x1xf32, #layout20>, tensor<1x14x14x1024xf32, #layout63>, !tt.device<#device>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc670)
+    %563 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x1024x14>}> : (!tt.device<#device>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc671)
+    %564 = "ttnn.transpose"(%562, %563) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x14x1024xf32, #layout63>, tensor<1x14x1024x14xf32, #layout64>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc671)
+    %565 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc672)
+    %566 = "ttnn.transpose"(%564, %565) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x14x1024x14xf32, #layout64>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc672)
+    %567 = "ttnn.to_layout"(%arg55, %0) <{layout = #ttnn.layout<tile>}> : (tensor<1024x1x1xf32, #layout5>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc673)
+    %568 = "ttnn.to_device"(%567, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<1024x1x1xf32, #layout66>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc673)
+    %569 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc673)
+    %570 = "ttnn.multiply"(%566, %568, %569) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1024x1x1xf32, #layout66>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc673)
+    %571 = "ttnn.to_layout"(%arg56, %0) <{layout = #ttnn.layout<tile>}> : (tensor<1024x1x1xf32, #layout5>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc674)
+    %572 = "ttnn.to_device"(%571, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<1024x1x1xf32, #layout66>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc674)
+    %573 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc674)
+    %574 = "ttnn.add"(%570, %572, %573) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1024x1x1xf32, #layout66>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc674)
+    %575 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc675)
+    %576 = "ttnn.add"(%556, %574, %575) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1x1024x14x14xf32, #layout65>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc675)
+    %577 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc676)
+    %578 = "ttnn.relu"(%576, %577) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc676)
+    %579 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x1024x14>}> : (!tt.device<#device>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc677)
+    %580 = "ttnn.transpose"(%578, %579) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1x14x1024x14xf32, #layout64>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc677)
+    %581 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x1024>}> : (!tt.device<#device>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc678)
+    %582 = "ttnn.transpose"(%580, %581) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x1024x14xf32, #layout64>, tensor<1x14x14x1024xf32, #layout63>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc678)
+    %583 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x256>}> : (!tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc679)
+    %584 = "ttnn.conv2d"(%582, %arg135, %583, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 1024 : i32, input_height = 14 : i32, input_width = 14 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 256 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x14x14x1024xf32, #layout63>, tensor<256x1024x1x1xf32, #layout21>, tensor<1x14x14x256xf32, #layout60>, !tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc679)
+    %585 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x256x14>}> : (!tt.device<#device>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc680)
+    %586 = "ttnn.transpose"(%584, %585) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x14x256xf32, #layout60>, tensor<1x14x256x14xf32, #layout61>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc680)
+    %587 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc681)
+    %588 = "ttnn.transpose"(%586, %587) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x14x256x14xf32, #layout61>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc681)
+    %589 = "ttnn.to_layout"(%arg57, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc682)
+    %590 = "ttnn.to_device"(%589, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc682)
+    %591 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc682)
+    %592 = "ttnn.multiply"(%588, %590, %591) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<256x1x1xf32, #layout45>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc682)
+    %593 = "ttnn.to_layout"(%arg58, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc683)
+    %594 = "ttnn.to_device"(%593, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc683)
+    %595 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc683)
+    %596 = "ttnn.add"(%592, %594, %595) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<256x1x1xf32, #layout45>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc683)
+    %597 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc684)
+    %598 = "ttnn.relu"(%596, %597) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc684)
+    %599 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x256x14>}> : (!tt.device<#device>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc685)
+    %600 = "ttnn.transpose"(%598, %599) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x256x14x14xf32, #layout62>, tensor<1x14x256x14xf32, #layout61>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc685)
+    %601 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x256>}> : (!tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc686)
+    %602 = "ttnn.transpose"(%600, %601) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x256x14xf32, #layout61>, tensor<1x14x14x256xf32, #layout60>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc686)
+    %603 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x256>}> : (!tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc687)
+    %604 = "ttnn.conv2d"(%602, %arg136, %603, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 256 : i32, input_height = 14 : i32, input_width = 14 : i32, kernel_height = 3 : i32, kernel_width = 3 : i32, out_channels = 256 : i32, padding_height = 1 : i32, padding_width = 1 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x14x14x256xf32, #layout60>, tensor<256x256x3x3xf32, #layout18>, tensor<1x14x14x256xf32, #layout60>, !tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc687)
+    %605 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x256x14>}> : (!tt.device<#device>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc688)
+    %606 = "ttnn.transpose"(%604, %605) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x14x256xf32, #layout60>, tensor<1x14x256x14xf32, #layout61>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc688)
+    %607 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc689)
+    %608 = "ttnn.transpose"(%606, %607) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x14x256x14xf32, #layout61>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc689)
+    %609 = "ttnn.to_layout"(%arg59, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc690)
+    %610 = "ttnn.to_device"(%609, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc690)
+    %611 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc690)
+    %612 = "ttnn.multiply"(%608, %610, %611) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<256x1x1xf32, #layout45>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc690)
+    %613 = "ttnn.to_layout"(%arg60, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc691)
+    %614 = "ttnn.to_device"(%613, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc691)
+    %615 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc691)
+    %616 = "ttnn.add"(%612, %614, %615) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<256x1x1xf32, #layout45>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc691)
+    %617 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc692)
+    %618 = "ttnn.relu"(%616, %617) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc692)
+    %619 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x256x14>}> : (!tt.device<#device>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc693)
+    %620 = "ttnn.transpose"(%618, %619) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x256x14x14xf32, #layout62>, tensor<1x14x256x14xf32, #layout61>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc693)
+    %621 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x256>}> : (!tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc694)
+    %622 = "ttnn.transpose"(%620, %621) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x256x14xf32, #layout61>, tensor<1x14x14x256xf32, #layout60>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc694)
+    %623 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x1024>}> : (!tt.device<#device>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc695)
+    %624 = "ttnn.conv2d"(%622, %arg137, %623, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 256 : i32, input_height = 14 : i32, input_width = 14 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 1024 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x14x14x256xf32, #layout60>, tensor<1024x256x1x1xf32, #layout19>, tensor<1x14x14x1024xf32, #layout63>, !tt.device<#device>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc695)
+    %625 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x1024x14>}> : (!tt.device<#device>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc696)
+    %626 = "ttnn.transpose"(%624, %625) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x14x1024xf32, #layout63>, tensor<1x14x1024x14xf32, #layout64>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc696)
+    %627 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc697)
+    %628 = "ttnn.transpose"(%626, %627) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x14x1024x14xf32, #layout64>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc697)
+    %629 = "ttnn.to_layout"(%arg61, %0) <{layout = #ttnn.layout<tile>}> : (tensor<1024x1x1xf32, #layout5>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc698)
+    %630 = "ttnn.to_device"(%629, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<1024x1x1xf32, #layout66>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc698)
+    %631 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc698)
+    %632 = "ttnn.multiply"(%628, %630, %631) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1024x1x1xf32, #layout66>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc698)
+    %633 = "ttnn.to_layout"(%arg62, %0) <{layout = #ttnn.layout<tile>}> : (tensor<1024x1x1xf32, #layout5>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc699)
+    %634 = "ttnn.to_device"(%633, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<1024x1x1xf32, #layout66>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc699)
+    %635 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc699)
+    %636 = "ttnn.add"(%632, %634, %635) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1024x1x1xf32, #layout66>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc699)
+    %637 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc700)
+    %638 = "ttnn.add"(%636, %578, %637) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1x1024x14x14xf32, #layout65>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc700)
+    %639 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc701)
+    %640 = "ttnn.relu"(%638, %639) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc701)
+    %641 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x1024x14>}> : (!tt.device<#device>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc702)
+    %642 = "ttnn.transpose"(%640, %641) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1x14x1024x14xf32, #layout64>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc702)
+    %643 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x1024>}> : (!tt.device<#device>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc703)
+    %644 = "ttnn.transpose"(%642, %643) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x1024x14xf32, #layout64>, tensor<1x14x14x1024xf32, #layout63>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc703)
+    %645 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x256>}> : (!tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc704)
+    %646 = "ttnn.conv2d"(%644, %arg138, %645, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 1024 : i32, input_height = 14 : i32, input_width = 14 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 256 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x14x14x1024xf32, #layout63>, tensor<256x1024x1x1xf32, #layout21>, tensor<1x14x14x256xf32, #layout60>, !tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc704)
+    %647 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x256x14>}> : (!tt.device<#device>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc705)
+    %648 = "ttnn.transpose"(%646, %647) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x14x256xf32, #layout60>, tensor<1x14x256x14xf32, #layout61>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc705)
+    %649 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc706)
+    %650 = "ttnn.transpose"(%648, %649) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x14x256x14xf32, #layout61>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc706)
+    %651 = "ttnn.to_layout"(%arg63, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc707)
+    %652 = "ttnn.to_device"(%651, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc707)
+    %653 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc707)
+    %654 = "ttnn.multiply"(%650, %652, %653) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<256x1x1xf32, #layout45>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc707)
+    %655 = "ttnn.to_layout"(%arg64, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc708)
+    %656 = "ttnn.to_device"(%655, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc708)
+    %657 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc708)
+    %658 = "ttnn.add"(%654, %656, %657) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<256x1x1xf32, #layout45>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc708)
+    %659 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc709)
+    %660 = "ttnn.relu"(%658, %659) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc709)
+    %661 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x256x14>}> : (!tt.device<#device>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc710)
+    %662 = "ttnn.transpose"(%660, %661) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x256x14x14xf32, #layout62>, tensor<1x14x256x14xf32, #layout61>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc710)
+    %663 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x256>}> : (!tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc711)
+    %664 = "ttnn.transpose"(%662, %663) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x256x14xf32, #layout61>, tensor<1x14x14x256xf32, #layout60>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc711)
+    %665 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x256>}> : (!tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc712)
+    %666 = "ttnn.conv2d"(%664, %arg139, %665, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 256 : i32, input_height = 14 : i32, input_width = 14 : i32, kernel_height = 3 : i32, kernel_width = 3 : i32, out_channels = 256 : i32, padding_height = 1 : i32, padding_width = 1 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x14x14x256xf32, #layout60>, tensor<256x256x3x3xf32, #layout18>, tensor<1x14x14x256xf32, #layout60>, !tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc712)
+    %667 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x256x14>}> : (!tt.device<#device>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc713)
+    %668 = "ttnn.transpose"(%666, %667) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x14x256xf32, #layout60>, tensor<1x14x256x14xf32, #layout61>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc713)
+    %669 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc714)
+    %670 = "ttnn.transpose"(%668, %669) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x14x256x14xf32, #layout61>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc714)
+    %671 = "ttnn.to_layout"(%arg65, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc715)
+    %672 = "ttnn.to_device"(%671, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc715)
+    %673 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc715)
+    %674 = "ttnn.multiply"(%670, %672, %673) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<256x1x1xf32, #layout45>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc715)
+    %675 = "ttnn.to_layout"(%arg66, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc716)
+    %676 = "ttnn.to_device"(%675, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc716)
+    %677 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc716)
+    %678 = "ttnn.add"(%674, %676, %677) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<256x1x1xf32, #layout45>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc716)
+    %679 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc717)
+    %680 = "ttnn.relu"(%678, %679) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc717)
+    %681 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x256x14>}> : (!tt.device<#device>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc718)
+    %682 = "ttnn.transpose"(%680, %681) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x256x14x14xf32, #layout62>, tensor<1x14x256x14xf32, #layout61>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc718)
+    %683 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x256>}> : (!tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc719)
+    %684 = "ttnn.transpose"(%682, %683) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x256x14xf32, #layout61>, tensor<1x14x14x256xf32, #layout60>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc719)
+    %685 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x1024>}> : (!tt.device<#device>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc720)
+    %686 = "ttnn.conv2d"(%684, %arg140, %685, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 256 : i32, input_height = 14 : i32, input_width = 14 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 1024 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x14x14x256xf32, #layout60>, tensor<1024x256x1x1xf32, #layout19>, tensor<1x14x14x1024xf32, #layout63>, !tt.device<#device>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc720)
+    %687 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x1024x14>}> : (!tt.device<#device>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc721)
+    %688 = "ttnn.transpose"(%686, %687) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x14x1024xf32, #layout63>, tensor<1x14x1024x14xf32, #layout64>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc721)
+    %689 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc722)
+    %690 = "ttnn.transpose"(%688, %689) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x14x1024x14xf32, #layout64>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc722)
+    %691 = "ttnn.to_layout"(%arg67, %0) <{layout = #ttnn.layout<tile>}> : (tensor<1024x1x1xf32, #layout5>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc723)
+    %692 = "ttnn.to_device"(%691, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<1024x1x1xf32, #layout66>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc723)
+    %693 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc723)
+    %694 = "ttnn.multiply"(%690, %692, %693) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1024x1x1xf32, #layout66>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc723)
+    %695 = "ttnn.to_layout"(%arg68, %0) <{layout = #ttnn.layout<tile>}> : (tensor<1024x1x1xf32, #layout5>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc724)
+    %696 = "ttnn.to_device"(%695, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<1024x1x1xf32, #layout66>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc724)
+    %697 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc724)
+    %698 = "ttnn.add"(%694, %696, %697) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1024x1x1xf32, #layout66>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc724)
+    %699 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc725)
+    %700 = "ttnn.add"(%698, %640, %699) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1x1024x14x14xf32, #layout65>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc725)
+    %701 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc726)
+    %702 = "ttnn.relu"(%700, %701) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc726)
+    %703 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x1024x14>}> : (!tt.device<#device>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc727)
+    %704 = "ttnn.transpose"(%702, %703) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1x14x1024x14xf32, #layout64>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc727)
+    %705 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x1024>}> : (!tt.device<#device>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc728)
+    %706 = "ttnn.transpose"(%704, %705) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x1024x14xf32, #layout64>, tensor<1x14x14x1024xf32, #layout63>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc728)
+    %707 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x256>}> : (!tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc729)
+    %708 = "ttnn.conv2d"(%706, %arg141, %707, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 1024 : i32, input_height = 14 : i32, input_width = 14 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 256 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x14x14x1024xf32, #layout63>, tensor<256x1024x1x1xf32, #layout21>, tensor<1x14x14x256xf32, #layout60>, !tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc729)
+    %709 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x256x14>}> : (!tt.device<#device>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc730)
+    %710 = "ttnn.transpose"(%708, %709) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x14x256xf32, #layout60>, tensor<1x14x256x14xf32, #layout61>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc730)
+    %711 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc731)
+    %712 = "ttnn.transpose"(%710, %711) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x14x256x14xf32, #layout61>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc731)
+    %713 = "ttnn.to_layout"(%arg69, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc732)
+    %714 = "ttnn.to_device"(%713, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc732)
+    %715 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc732)
+    %716 = "ttnn.multiply"(%712, %714, %715) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<256x1x1xf32, #layout45>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc732)
+    %717 = "ttnn.to_layout"(%arg70, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc733)
+    %718 = "ttnn.to_device"(%717, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc733)
+    %719 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc733)
+    %720 = "ttnn.add"(%716, %718, %719) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<256x1x1xf32, #layout45>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc733)
+    %721 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc734)
+    %722 = "ttnn.relu"(%720, %721) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc734)
+    %723 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x256x14>}> : (!tt.device<#device>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc735)
+    %724 = "ttnn.transpose"(%722, %723) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x256x14x14xf32, #layout62>, tensor<1x14x256x14xf32, #layout61>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc735)
+    %725 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x256>}> : (!tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc736)
+    %726 = "ttnn.transpose"(%724, %725) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x256x14xf32, #layout61>, tensor<1x14x14x256xf32, #layout60>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc736)
+    %727 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x256>}> : (!tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc737)
+    %728 = "ttnn.conv2d"(%726, %arg142, %727, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 256 : i32, input_height = 14 : i32, input_width = 14 : i32, kernel_height = 3 : i32, kernel_width = 3 : i32, out_channels = 256 : i32, padding_height = 1 : i32, padding_width = 1 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x14x14x256xf32, #layout60>, tensor<256x256x3x3xf32, #layout18>, tensor<1x14x14x256xf32, #layout60>, !tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc737)
+    %729 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x256x14>}> : (!tt.device<#device>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc738)
+    %730 = "ttnn.transpose"(%728, %729) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x14x256xf32, #layout60>, tensor<1x14x256x14xf32, #layout61>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc738)
+    %731 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc739)
+    %732 = "ttnn.transpose"(%730, %731) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x14x256x14xf32, #layout61>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc739)
+    %733 = "ttnn.to_layout"(%arg71, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc740)
+    %734 = "ttnn.to_device"(%733, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc740)
+    %735 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc740)
+    %736 = "ttnn.multiply"(%732, %734, %735) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<256x1x1xf32, #layout45>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc740)
+    %737 = "ttnn.to_layout"(%arg72, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc741)
+    %738 = "ttnn.to_device"(%737, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc741)
+    %739 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc741)
+    %740 = "ttnn.add"(%736, %738, %739) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<256x1x1xf32, #layout45>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc741)
+    %741 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc742)
+    %742 = "ttnn.relu"(%740, %741) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc742)
+    %743 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x256x14>}> : (!tt.device<#device>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc743)
+    %744 = "ttnn.transpose"(%742, %743) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x256x14x14xf32, #layout62>, tensor<1x14x256x14xf32, #layout61>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc743)
+    %745 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x256>}> : (!tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc744)
+    %746 = "ttnn.transpose"(%744, %745) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x256x14xf32, #layout61>, tensor<1x14x14x256xf32, #layout60>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc744)
+    %747 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x1024>}> : (!tt.device<#device>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc745)
+    %748 = "ttnn.conv2d"(%746, %arg143, %747, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 256 : i32, input_height = 14 : i32, input_width = 14 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 1024 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x14x14x256xf32, #layout60>, tensor<1024x256x1x1xf32, #layout19>, tensor<1x14x14x1024xf32, #layout63>, !tt.device<#device>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc745)
+    %749 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x1024x14>}> : (!tt.device<#device>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc746)
+    %750 = "ttnn.transpose"(%748, %749) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x14x1024xf32, #layout63>, tensor<1x14x1024x14xf32, #layout64>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc746)
+    %751 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc747)
+    %752 = "ttnn.transpose"(%750, %751) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x14x1024x14xf32, #layout64>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc747)
+    %753 = "ttnn.to_layout"(%arg73, %0) <{layout = #ttnn.layout<tile>}> : (tensor<1024x1x1xf32, #layout5>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc748)
+    %754 = "ttnn.to_device"(%753, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<1024x1x1xf32, #layout66>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc748)
+    %755 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc748)
+    %756 = "ttnn.multiply"(%752, %754, %755) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1024x1x1xf32, #layout66>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc748)
+    %757 = "ttnn.to_layout"(%arg74, %0) <{layout = #ttnn.layout<tile>}> : (tensor<1024x1x1xf32, #layout5>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc749)
+    %758 = "ttnn.to_device"(%757, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<1024x1x1xf32, #layout66>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc749)
+    %759 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc749)
+    %760 = "ttnn.add"(%756, %758, %759) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1024x1x1xf32, #layout66>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc749)
+    %761 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc750)
+    %762 = "ttnn.add"(%760, %702, %761) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1x1024x14x14xf32, #layout65>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc750)
+    %763 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc751)
+    %764 = "ttnn.relu"(%762, %763) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc751)
+    %765 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x1024x14>}> : (!tt.device<#device>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc752)
+    %766 = "ttnn.transpose"(%764, %765) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1x14x1024x14xf32, #layout64>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc752)
+    %767 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x1024>}> : (!tt.device<#device>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc753)
+    %768 = "ttnn.transpose"(%766, %767) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x1024x14xf32, #layout64>, tensor<1x14x14x1024xf32, #layout63>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc753)
+    %769 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x256>}> : (!tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc754)
+    %770 = "ttnn.conv2d"(%768, %arg144, %769, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 1024 : i32, input_height = 14 : i32, input_width = 14 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 256 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x14x14x1024xf32, #layout63>, tensor<256x1024x1x1xf32, #layout21>, tensor<1x14x14x256xf32, #layout60>, !tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc754)
+    %771 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x256x14>}> : (!tt.device<#device>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc755)
+    %772 = "ttnn.transpose"(%770, %771) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x14x256xf32, #layout60>, tensor<1x14x256x14xf32, #layout61>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc755)
+    %773 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc756)
+    %774 = "ttnn.transpose"(%772, %773) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x14x256x14xf32, #layout61>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc756)
+    %775 = "ttnn.to_layout"(%arg75, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc757)
+    %776 = "ttnn.to_device"(%775, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc757)
+    %777 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc757)
+    %778 = "ttnn.multiply"(%774, %776, %777) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<256x1x1xf32, #layout45>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc757)
+    %779 = "ttnn.to_layout"(%arg76, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc758)
+    %780 = "ttnn.to_device"(%779, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc758)
+    %781 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc758)
+    %782 = "ttnn.add"(%778, %780, %781) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<256x1x1xf32, #layout45>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc758)
+    %783 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc759)
+    %784 = "ttnn.relu"(%782, %783) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc759)
+    %785 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x256x14>}> : (!tt.device<#device>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc760)
+    %786 = "ttnn.transpose"(%784, %785) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x256x14x14xf32, #layout62>, tensor<1x14x256x14xf32, #layout61>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc760)
+    %787 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x256>}> : (!tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc761)
+    %788 = "ttnn.transpose"(%786, %787) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x256x14xf32, #layout61>, tensor<1x14x14x256xf32, #layout60>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc761)
+    %789 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x256>}> : (!tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc762)
+    %790 = "ttnn.conv2d"(%788, %arg145, %789, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 256 : i32, input_height = 14 : i32, input_width = 14 : i32, kernel_height = 3 : i32, kernel_width = 3 : i32, out_channels = 256 : i32, padding_height = 1 : i32, padding_width = 1 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x14x14x256xf32, #layout60>, tensor<256x256x3x3xf32, #layout18>, tensor<1x14x14x256xf32, #layout60>, !tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc762)
+    %791 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x256x14>}> : (!tt.device<#device>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc763)
+    %792 = "ttnn.transpose"(%790, %791) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x14x256xf32, #layout60>, tensor<1x14x256x14xf32, #layout61>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc763)
+    %793 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc764)
+    %794 = "ttnn.transpose"(%792, %793) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x14x256x14xf32, #layout61>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc764)
+    %795 = "ttnn.to_layout"(%arg77, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc765)
+    %796 = "ttnn.to_device"(%795, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc765)
+    %797 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc765)
+    %798 = "ttnn.multiply"(%794, %796, %797) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<256x1x1xf32, #layout45>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc765)
+    %799 = "ttnn.to_layout"(%arg78, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc766)
+    %800 = "ttnn.to_device"(%799, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc766)
+    %801 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc766)
+    %802 = "ttnn.add"(%798, %800, %801) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<256x1x1xf32, #layout45>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc766)
+    %803 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc767)
+    %804 = "ttnn.relu"(%802, %803) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc767)
+    %805 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x256x14>}> : (!tt.device<#device>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc768)
+    %806 = "ttnn.transpose"(%804, %805) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x256x14x14xf32, #layout62>, tensor<1x14x256x14xf32, #layout61>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc768)
+    %807 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x256>}> : (!tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc769)
+    %808 = "ttnn.transpose"(%806, %807) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x256x14xf32, #layout61>, tensor<1x14x14x256xf32, #layout60>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc769)
+    %809 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x1024>}> : (!tt.device<#device>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc770)
+    %810 = "ttnn.conv2d"(%808, %arg146, %809, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 256 : i32, input_height = 14 : i32, input_width = 14 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 1024 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x14x14x256xf32, #layout60>, tensor<1024x256x1x1xf32, #layout19>, tensor<1x14x14x1024xf32, #layout63>, !tt.device<#device>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc770)
+    %811 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x1024x14>}> : (!tt.device<#device>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc771)
+    %812 = "ttnn.transpose"(%810, %811) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x14x1024xf32, #layout63>, tensor<1x14x1024x14xf32, #layout64>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc771)
+    %813 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc772)
+    %814 = "ttnn.transpose"(%812, %813) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x14x1024x14xf32, #layout64>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc772)
+    %815 = "ttnn.to_layout"(%arg79, %0) <{layout = #ttnn.layout<tile>}> : (tensor<1024x1x1xf32, #layout5>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc773)
+    %816 = "ttnn.to_device"(%815, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<1024x1x1xf32, #layout66>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc773)
+    %817 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc773)
+    %818 = "ttnn.multiply"(%814, %816, %817) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1024x1x1xf32, #layout66>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc773)
+    %819 = "ttnn.to_layout"(%arg80, %0) <{layout = #ttnn.layout<tile>}> : (tensor<1024x1x1xf32, #layout5>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc774)
+    %820 = "ttnn.to_device"(%819, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<1024x1x1xf32, #layout66>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc774)
+    %821 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc774)
+    %822 = "ttnn.add"(%818, %820, %821) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1024x1x1xf32, #layout66>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc774)
+    %823 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc775)
+    %824 = "ttnn.add"(%822, %764, %823) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1x1024x14x14xf32, #layout65>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc775)
+    %825 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc776)
+    %826 = "ttnn.relu"(%824, %825) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc776)
+    %827 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x1024x14>}> : (!tt.device<#device>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc777)
+    %828 = "ttnn.transpose"(%826, %827) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1x14x1024x14xf32, #layout64>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc777)
+    %829 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x1024>}> : (!tt.device<#device>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc778)
+    %830 = "ttnn.transpose"(%828, %829) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x1024x14xf32, #layout64>, tensor<1x14x14x1024xf32, #layout63>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc778)
+    %831 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x256>}> : (!tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc779)
+    %832 = "ttnn.conv2d"(%830, %arg147, %831, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 1024 : i32, input_height = 14 : i32, input_width = 14 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 256 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x14x14x1024xf32, #layout63>, tensor<256x1024x1x1xf32, #layout21>, tensor<1x14x14x256xf32, #layout60>, !tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc779)
+    %833 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x256x14>}> : (!tt.device<#device>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc780)
+    %834 = "ttnn.transpose"(%832, %833) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x14x256xf32, #layout60>, tensor<1x14x256x14xf32, #layout61>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc780)
+    %835 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc781)
+    %836 = "ttnn.transpose"(%834, %835) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x14x256x14xf32, #layout61>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc781)
+    %837 = "ttnn.to_layout"(%arg81, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc782)
+    %838 = "ttnn.to_device"(%837, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc782)
+    %839 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc782)
+    %840 = "ttnn.multiply"(%836, %838, %839) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<256x1x1xf32, #layout45>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc782)
+    %841 = "ttnn.to_layout"(%arg82, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc783)
+    %842 = "ttnn.to_device"(%841, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc783)
+    %843 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc783)
+    %844 = "ttnn.add"(%840, %842, %843) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<256x1x1xf32, #layout45>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc783)
+    %845 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc784)
+    %846 = "ttnn.relu"(%844, %845) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc784)
+    %847 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x256x14>}> : (!tt.device<#device>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc785)
+    %848 = "ttnn.transpose"(%846, %847) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x256x14x14xf32, #layout62>, tensor<1x14x256x14xf32, #layout61>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc785)
+    %849 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x256>}> : (!tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc786)
+    %850 = "ttnn.transpose"(%848, %849) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x256x14xf32, #layout61>, tensor<1x14x14x256xf32, #layout60>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc786)
+    %851 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x256>}> : (!tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc787)
+    %852 = "ttnn.conv2d"(%850, %arg148, %851, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 256 : i32, input_height = 14 : i32, input_width = 14 : i32, kernel_height = 3 : i32, kernel_width = 3 : i32, out_channels = 256 : i32, padding_height = 1 : i32, padding_width = 1 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x14x14x256xf32, #layout60>, tensor<256x256x3x3xf32, #layout18>, tensor<1x14x14x256xf32, #layout60>, !tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc787)
+    %853 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x256x14>}> : (!tt.device<#device>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc788)
+    %854 = "ttnn.transpose"(%852, %853) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x14x256xf32, #layout60>, tensor<1x14x256x14xf32, #layout61>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc788)
+    %855 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc789)
+    %856 = "ttnn.transpose"(%854, %855) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x14x256x14xf32, #layout61>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc789)
+    %857 = "ttnn.to_layout"(%arg83, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc790)
+    %858 = "ttnn.to_device"(%857, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc790)
+    %859 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc790)
+    %860 = "ttnn.multiply"(%856, %858, %859) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<256x1x1xf32, #layout45>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc790)
+    %861 = "ttnn.to_layout"(%arg84, %0) <{layout = #ttnn.layout<tile>}> : (tensor<256x1x1xf32, #layout2>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc791)
+    %862 = "ttnn.to_device"(%861, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<256x1x1xf32, #layout45>, !tt.device<#device>) -> tensor<256x1x1xf32, #layout45> loc(#loc791)
+    %863 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc791)
+    %864 = "ttnn.add"(%860, %862, %863) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<256x1x1xf32, #layout45>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc791)
+    %865 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x256x14x14>}> : (!tt.device<#device>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc792)
+    %866 = "ttnn.relu"(%864, %865) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x256x14x14xf32, #layout62>, tensor<1x256x14x14xf32, #layout62>) -> tensor<1x256x14x14xf32, #layout62> loc(#loc792)
+    %867 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x256x14>}> : (!tt.device<#device>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc793)
+    %868 = "ttnn.transpose"(%866, %867) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x256x14x14xf32, #layout62>, tensor<1x14x256x14xf32, #layout61>) -> tensor<1x14x256x14xf32, #layout61> loc(#loc793)
+    %869 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x256>}> : (!tt.device<#device>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc794)
+    %870 = "ttnn.transpose"(%868, %869) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x256x14xf32, #layout61>, tensor<1x14x14x256xf32, #layout60>) -> tensor<1x14x14x256xf32, #layout60> loc(#loc794)
+    %871 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x1024>}> : (!tt.device<#device>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc795)
+    %872 = "ttnn.conv2d"(%870, %arg149, %871, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 256 : i32, input_height = 14 : i32, input_width = 14 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 1024 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x14x14x256xf32, #layout60>, tensor<1024x256x1x1xf32, #layout19>, tensor<1x14x14x1024xf32, #layout63>, !tt.device<#device>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc795)
+    %873 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x1024x14>}> : (!tt.device<#device>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc796)
+    %874 = "ttnn.transpose"(%872, %873) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x14x1024xf32, #layout63>, tensor<1x14x1024x14xf32, #layout64>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc796)
+    %875 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc797)
+    %876 = "ttnn.transpose"(%874, %875) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x14x1024x14xf32, #layout64>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc797)
+    %877 = "ttnn.to_layout"(%arg85, %0) <{layout = #ttnn.layout<tile>}> : (tensor<1024x1x1xf32, #layout5>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc798)
+    %878 = "ttnn.to_device"(%877, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<1024x1x1xf32, #layout66>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc798)
+    %879 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc798)
+    %880 = "ttnn.multiply"(%876, %878, %879) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1024x1x1xf32, #layout66>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc798)
+    %881 = "ttnn.to_layout"(%arg86, %0) <{layout = #ttnn.layout<tile>}> : (tensor<1024x1x1xf32, #layout5>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc799)
+    %882 = "ttnn.to_device"(%881, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<1024x1x1xf32, #layout66>, !tt.device<#device>) -> tensor<1024x1x1xf32, #layout66> loc(#loc799)
+    %883 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc799)
+    %884 = "ttnn.add"(%880, %882, %883) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1024x1x1xf32, #layout66>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc799)
+    %885 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc800)
+    %886 = "ttnn.add"(%884, %826, %885) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1x1024x14x14xf32, #layout65>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc800)
+    %887 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1024x14x14>}> : (!tt.device<#device>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc801)
+    %888 = "ttnn.relu"(%886, %887) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1x1024x14x14xf32, #layout65>) -> tensor<1x1024x14x14xf32, #layout65> loc(#loc801)
+    %889 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x1024x14>}> : (!tt.device<#device>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc802)
+    %890 = "ttnn.transpose"(%888, %889) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1x14x1024x14xf32, #layout64>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc802)
+    %891 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x1024>}> : (!tt.device<#device>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc803)
+    %892 = "ttnn.transpose"(%890, %891) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x1024x14xf32, #layout64>, tensor<1x14x14x1024xf32, #layout63>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc803)
+    %893 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x512>}> : (!tt.device<#device>) -> tensor<1x14x14x512xf32, #layout67> loc(#loc804)
+    %894 = "ttnn.conv2d"(%892, %arg150, %893, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 1024 : i32, input_height = 14 : i32, input_width = 14 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 512 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x14x14x1024xf32, #layout63>, tensor<512x1024x1x1xf32, #layout22>, tensor<1x14x14x512xf32, #layout67>, !tt.device<#device>) -> tensor<1x14x14x512xf32, #layout67> loc(#loc804)
+    %895 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x512x14>}> : (!tt.device<#device>) -> tensor<1x14x512x14xf32, #layout68> loc(#loc805)
+    %896 = "ttnn.transpose"(%894, %895) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x14x512xf32, #layout67>, tensor<1x14x512x14xf32, #layout68>) -> tensor<1x14x512x14xf32, #layout68> loc(#loc805)
+    %897 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x14x14>}> : (!tt.device<#device>) -> tensor<1x512x14x14xf32, #layout69> loc(#loc806)
+    %898 = "ttnn.transpose"(%896, %897) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x14x512x14xf32, #layout68>, tensor<1x512x14x14xf32, #layout69>) -> tensor<1x512x14x14xf32, #layout69> loc(#loc806)
+    %899 = "ttnn.to_layout"(%arg87, %0) <{layout = #ttnn.layout<tile>}> : (tensor<512x1x1xf32, #layout4>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc807)
+    %900 = "ttnn.to_device"(%899, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<512x1x1xf32, #layout56>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc807)
+    %901 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x14x14>}> : (!tt.device<#device>) -> tensor<1x512x14x14xf32, #layout69> loc(#loc807)
+    %902 = "ttnn.multiply"(%898, %900, %901) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x14x14xf32, #layout69>, tensor<512x1x1xf32, #layout56>, tensor<1x512x14x14xf32, #layout69>) -> tensor<1x512x14x14xf32, #layout69> loc(#loc807)
+    %903 = "ttnn.to_layout"(%arg88, %0) <{layout = #ttnn.layout<tile>}> : (tensor<512x1x1xf32, #layout4>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc808)
+    %904 = "ttnn.to_device"(%903, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<512x1x1xf32, #layout56>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc808)
+    %905 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x14x14>}> : (!tt.device<#device>) -> tensor<1x512x14x14xf32, #layout69> loc(#loc808)
+    %906 = "ttnn.add"(%902, %904, %905) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x14x14xf32, #layout69>, tensor<512x1x1xf32, #layout56>, tensor<1x512x14x14xf32, #layout69>) -> tensor<1x512x14x14xf32, #layout69> loc(#loc808)
+    %907 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x14x14>}> : (!tt.device<#device>) -> tensor<1x512x14x14xf32, #layout69> loc(#loc809)
+    %908 = "ttnn.relu"(%906, %907) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x512x14x14xf32, #layout69>, tensor<1x512x14x14xf32, #layout69>) -> tensor<1x512x14x14xf32, #layout69> loc(#loc809)
+    %909 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x512x14>}> : (!tt.device<#device>) -> tensor<1x14x512x14xf32, #layout68> loc(#loc810)
+    %910 = "ttnn.transpose"(%908, %909) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x512x14x14xf32, #layout69>, tensor<1x14x512x14xf32, #layout68>) -> tensor<1x14x512x14xf32, #layout68> loc(#loc810)
+    %911 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x512>}> : (!tt.device<#device>) -> tensor<1x14x14x512xf32, #layout67> loc(#loc811)
+    %912 = "ttnn.transpose"(%910, %911) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x512x14xf32, #layout68>, tensor<1x14x14x512xf32, #layout67>) -> tensor<1x14x14x512xf32, #layout67> loc(#loc811)
+    %913 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x7x512>}> : (!tt.device<#device>) -> tensor<1x7x7x512xf32, #layout70> loc(#loc812)
+    %914 = "ttnn.conv2d"(%912, %arg151, %913, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 512 : i32, input_height = 14 : i32, input_width = 14 : i32, kernel_height = 3 : i32, kernel_width = 3 : i32, out_channels = 512 : i32, padding_height = 1 : i32, padding_width = 1 : i32, stride_height = 2 : i32, stride_width = 2 : i32}> : (tensor<1x14x14x512xf32, #layout67>, tensor<512x512x3x3xf32, #layout23>, tensor<1x7x7x512xf32, #layout70>, !tt.device<#device>) -> tensor<1x7x7x512xf32, #layout70> loc(#loc812)
+    %915 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x512x7>}> : (!tt.device<#device>) -> tensor<1x7x512x7xf32, #layout71> loc(#loc813)
+    %916 = "ttnn.transpose"(%914, %915) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x7x7x512xf32, #layout70>, tensor<1x7x512x7xf32, #layout71>) -> tensor<1x7x512x7xf32, #layout71> loc(#loc813)
+    %917 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x7x7>}> : (!tt.device<#device>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc814)
+    %918 = "ttnn.transpose"(%916, %917) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x7x512x7xf32, #layout71>, tensor<1x512x7x7xf32, #layout72>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc814)
+    %919 = "ttnn.to_layout"(%arg89, %0) <{layout = #ttnn.layout<tile>}> : (tensor<512x1x1xf32, #layout4>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc815)
+    %920 = "ttnn.to_device"(%919, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<512x1x1xf32, #layout56>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc815)
+    %921 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x7x7>}> : (!tt.device<#device>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc815)
+    %922 = "ttnn.multiply"(%918, %920, %921) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x7x7xf32, #layout72>, tensor<512x1x1xf32, #layout56>, tensor<1x512x7x7xf32, #layout72>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc815)
+    %923 = "ttnn.to_layout"(%arg90, %0) <{layout = #ttnn.layout<tile>}> : (tensor<512x1x1xf32, #layout4>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc816)
+    %924 = "ttnn.to_device"(%923, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<512x1x1xf32, #layout56>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc816)
+    %925 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x7x7>}> : (!tt.device<#device>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc816)
+    %926 = "ttnn.add"(%922, %924, %925) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x7x7xf32, #layout72>, tensor<512x1x1xf32, #layout56>, tensor<1x512x7x7xf32, #layout72>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc816)
+    %927 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x7x7>}> : (!tt.device<#device>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc817)
+    %928 = "ttnn.relu"(%926, %927) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x512x7x7xf32, #layout72>, tensor<1x512x7x7xf32, #layout72>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc817)
+    %929 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x512x7>}> : (!tt.device<#device>) -> tensor<1x7x512x7xf32, #layout71> loc(#loc818)
+    %930 = "ttnn.transpose"(%928, %929) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x512x7x7xf32, #layout72>, tensor<1x7x512x7xf32, #layout71>) -> tensor<1x7x512x7xf32, #layout71> loc(#loc818)
+    %931 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x7x512>}> : (!tt.device<#device>) -> tensor<1x7x7x512xf32, #layout70> loc(#loc819)
+    %932 = "ttnn.transpose"(%930, %931) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x7x512x7xf32, #layout71>, tensor<1x7x7x512xf32, #layout70>) -> tensor<1x7x7x512xf32, #layout70> loc(#loc819)
+    %933 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x7x2048>}> : (!tt.device<#device>) -> tensor<1x7x7x2048xf32, #layout73> loc(#loc820)
+    %934 = "ttnn.conv2d"(%932, %arg152, %933, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 512 : i32, input_height = 7 : i32, input_width = 7 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 2048 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x7x7x512xf32, #layout70>, tensor<2048x512x1x1xf32, #layout24>, tensor<1x7x7x2048xf32, #layout73>, !tt.device<#device>) -> tensor<1x7x7x2048xf32, #layout73> loc(#loc820)
+    %935 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x2048x7>}> : (!tt.device<#device>) -> tensor<1x7x2048x7xf32, #layout74> loc(#loc821)
+    %936 = "ttnn.transpose"(%934, %935) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x7x7x2048xf32, #layout73>, tensor<1x7x2048x7xf32, #layout74>) -> tensor<1x7x2048x7xf32, #layout74> loc(#loc821)
+    %937 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x2048x7x7>}> : (!tt.device<#device>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc822)
+    %938 = "ttnn.transpose"(%936, %937) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x7x2048x7xf32, #layout74>, tensor<1x2048x7x7xf32, #layout75>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc822)
+    %939 = "ttnn.to_layout"(%arg91, %0) <{layout = #ttnn.layout<tile>}> : (tensor<2048x1x1xf32, #layout6>, !tt.device<#device>) -> tensor<2048x1x1xf32, #layout76> loc(#loc823)
+    %940 = "ttnn.to_device"(%939, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<2048x1x1xf32, #layout76>, !tt.device<#device>) -> tensor<2048x1x1xf32, #layout76> loc(#loc823)
+    %941 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x2048x7x7>}> : (!tt.device<#device>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc823)
+    %942 = "ttnn.multiply"(%938, %940, %941) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x2048x7x7xf32, #layout75>, tensor<2048x1x1xf32, #layout76>, tensor<1x2048x7x7xf32, #layout75>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc823)
+    %943 = "ttnn.to_layout"(%arg92, %0) <{layout = #ttnn.layout<tile>}> : (tensor<2048x1x1xf32, #layout6>, !tt.device<#device>) -> tensor<2048x1x1xf32, #layout76> loc(#loc824)
+    %944 = "ttnn.to_device"(%943, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<2048x1x1xf32, #layout76>, !tt.device<#device>) -> tensor<2048x1x1xf32, #layout76> loc(#loc824)
+    %945 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x2048x7x7>}> : (!tt.device<#device>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc824)
+    %946 = "ttnn.add"(%942, %944, %945) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x2048x7x7xf32, #layout75>, tensor<2048x1x1xf32, #layout76>, tensor<1x2048x7x7xf32, #layout75>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc824)
+    %947 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x1024x14>}> : (!tt.device<#device>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc825)
+    %948 = "ttnn.transpose"(%888, %947) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x1024x14x14xf32, #layout65>, tensor<1x14x1024x14xf32, #layout64>) -> tensor<1x14x1024x14xf32, #layout64> loc(#loc825)
+    %949 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x14x14x1024>}> : (!tt.device<#device>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc826)
+    %950 = "ttnn.transpose"(%948, %949) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x14x1024x14xf32, #layout64>, tensor<1x14x14x1024xf32, #layout63>) -> tensor<1x14x14x1024xf32, #layout63> loc(#loc826)
+    %951 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x7x2048>}> : (!tt.device<#device>) -> tensor<1x7x7x2048xf32, #layout73> loc(#loc827)
+    %952 = "ttnn.conv2d"(%950, %arg153, %951, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 1024 : i32, input_height = 14 : i32, input_width = 14 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 2048 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 2 : i32, stride_width = 2 : i32}> : (tensor<1x14x14x1024xf32, #layout63>, tensor<2048x1024x1x1xf32, #layout25>, tensor<1x7x7x2048xf32, #layout73>, !tt.device<#device>) -> tensor<1x7x7x2048xf32, #layout73> loc(#loc827)
+    %953 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x2048x7>}> : (!tt.device<#device>) -> tensor<1x7x2048x7xf32, #layout74> loc(#loc828)
+    %954 = "ttnn.transpose"(%952, %953) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x7x7x2048xf32, #layout73>, tensor<1x7x2048x7xf32, #layout74>) -> tensor<1x7x2048x7xf32, #layout74> loc(#loc828)
+    %955 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x2048x7x7>}> : (!tt.device<#device>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc829)
+    %956 = "ttnn.transpose"(%954, %955) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x7x2048x7xf32, #layout74>, tensor<1x2048x7x7xf32, #layout75>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc829)
+    %957 = "ttnn.to_layout"(%arg93, %0) <{layout = #ttnn.layout<tile>}> : (tensor<2048x1x1xf32, #layout6>, !tt.device<#device>) -> tensor<2048x1x1xf32, #layout76> loc(#loc830)
+    %958 = "ttnn.to_device"(%957, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<2048x1x1xf32, #layout76>, !tt.device<#device>) -> tensor<2048x1x1xf32, #layout76> loc(#loc830)
+    %959 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x2048x7x7>}> : (!tt.device<#device>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc830)
+    %960 = "ttnn.multiply"(%956, %958, %959) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x2048x7x7xf32, #layout75>, tensor<2048x1x1xf32, #layout76>, tensor<1x2048x7x7xf32, #layout75>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc830)
+    %961 = "ttnn.to_layout"(%arg94, %0) <{layout = #ttnn.layout<tile>}> : (tensor<2048x1x1xf32, #layout6>, !tt.device<#device>) -> tensor<2048x1x1xf32, #layout76> loc(#loc831)
+    %962 = "ttnn.to_device"(%961, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<2048x1x1xf32, #layout76>, !tt.device<#device>) -> tensor<2048x1x1xf32, #layout76> loc(#loc831)
+    %963 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x2048x7x7>}> : (!tt.device<#device>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc831)
+    %964 = "ttnn.add"(%960, %962, %963) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x2048x7x7xf32, #layout75>, tensor<2048x1x1xf32, #layout76>, tensor<1x2048x7x7xf32, #layout75>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc831)
+    %965 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x2048x7x7>}> : (!tt.device<#device>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc832)
+    %966 = "ttnn.add"(%946, %964, %965) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x2048x7x7xf32, #layout75>, tensor<1x2048x7x7xf32, #layout75>, tensor<1x2048x7x7xf32, #layout75>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc832)
+    %967 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x2048x7x7>}> : (!tt.device<#device>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc833)
+    %968 = "ttnn.relu"(%966, %967) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x2048x7x7xf32, #layout75>, tensor<1x2048x7x7xf32, #layout75>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc833)
+    %969 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x2048x7>}> : (!tt.device<#device>) -> tensor<1x7x2048x7xf32, #layout74> loc(#loc834)
+    %970 = "ttnn.transpose"(%968, %969) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x2048x7x7xf32, #layout75>, tensor<1x7x2048x7xf32, #layout74>) -> tensor<1x7x2048x7xf32, #layout74> loc(#loc834)
+    %971 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x7x2048>}> : (!tt.device<#device>) -> tensor<1x7x7x2048xf32, #layout73> loc(#loc835)
+    %972 = "ttnn.transpose"(%970, %971) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x7x2048x7xf32, #layout74>, tensor<1x7x7x2048xf32, #layout73>) -> tensor<1x7x7x2048xf32, #layout73> loc(#loc835)
+    %973 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x7x512>}> : (!tt.device<#device>) -> tensor<1x7x7x512xf32, #layout70> loc(#loc836)
+    %974 = "ttnn.conv2d"(%972, %arg154, %973, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 2048 : i32, input_height = 7 : i32, input_width = 7 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 512 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x7x7x2048xf32, #layout73>, tensor<512x2048x1x1xf32, #layout26>, tensor<1x7x7x512xf32, #layout70>, !tt.device<#device>) -> tensor<1x7x7x512xf32, #layout70> loc(#loc836)
+    %975 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x512x7>}> : (!tt.device<#device>) -> tensor<1x7x512x7xf32, #layout71> loc(#loc837)
+    %976 = "ttnn.transpose"(%974, %975) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x7x7x512xf32, #layout70>, tensor<1x7x512x7xf32, #layout71>) -> tensor<1x7x512x7xf32, #layout71> loc(#loc837)
+    %977 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x7x7>}> : (!tt.device<#device>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc838)
+    %978 = "ttnn.transpose"(%976, %977) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x7x512x7xf32, #layout71>, tensor<1x512x7x7xf32, #layout72>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc838)
+    %979 = "ttnn.to_layout"(%arg95, %0) <{layout = #ttnn.layout<tile>}> : (tensor<512x1x1xf32, #layout4>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc839)
+    %980 = "ttnn.to_device"(%979, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<512x1x1xf32, #layout56>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc839)
+    %981 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x7x7>}> : (!tt.device<#device>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc839)
+    %982 = "ttnn.multiply"(%978, %980, %981) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x7x7xf32, #layout72>, tensor<512x1x1xf32, #layout56>, tensor<1x512x7x7xf32, #layout72>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc839)
+    %983 = "ttnn.to_layout"(%arg96, %0) <{layout = #ttnn.layout<tile>}> : (tensor<512x1x1xf32, #layout4>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc840)
+    %984 = "ttnn.to_device"(%983, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<512x1x1xf32, #layout56>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc840)
+    %985 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x7x7>}> : (!tt.device<#device>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc840)
+    %986 = "ttnn.add"(%982, %984, %985) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x7x7xf32, #layout72>, tensor<512x1x1xf32, #layout56>, tensor<1x512x7x7xf32, #layout72>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc840)
+    %987 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x7x7>}> : (!tt.device<#device>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc841)
+    %988 = "ttnn.relu"(%986, %987) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x512x7x7xf32, #layout72>, tensor<1x512x7x7xf32, #layout72>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc841)
+    %989 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x512x7>}> : (!tt.device<#device>) -> tensor<1x7x512x7xf32, #layout71> loc(#loc842)
+    %990 = "ttnn.transpose"(%988, %989) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x512x7x7xf32, #layout72>, tensor<1x7x512x7xf32, #layout71>) -> tensor<1x7x512x7xf32, #layout71> loc(#loc842)
+    %991 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x7x512>}> : (!tt.device<#device>) -> tensor<1x7x7x512xf32, #layout70> loc(#loc843)
+    %992 = "ttnn.transpose"(%990, %991) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x7x512x7xf32, #layout71>, tensor<1x7x7x512xf32, #layout70>) -> tensor<1x7x7x512xf32, #layout70> loc(#loc843)
+    %993 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x7x512>}> : (!tt.device<#device>) -> tensor<1x7x7x512xf32, #layout70> loc(#loc844)
+    %994 = "ttnn.conv2d"(%992, %arg155, %993, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 512 : i32, input_height = 7 : i32, input_width = 7 : i32, kernel_height = 3 : i32, kernel_width = 3 : i32, out_channels = 512 : i32, padding_height = 1 : i32, padding_width = 1 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x7x7x512xf32, #layout70>, tensor<512x512x3x3xf32, #layout23>, tensor<1x7x7x512xf32, #layout70>, !tt.device<#device>) -> tensor<1x7x7x512xf32, #layout70> loc(#loc844)
+    %995 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x512x7>}> : (!tt.device<#device>) -> tensor<1x7x512x7xf32, #layout71> loc(#loc845)
+    %996 = "ttnn.transpose"(%994, %995) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x7x7x512xf32, #layout70>, tensor<1x7x512x7xf32, #layout71>) -> tensor<1x7x512x7xf32, #layout71> loc(#loc845)
+    %997 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x7x7>}> : (!tt.device<#device>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc846)
+    %998 = "ttnn.transpose"(%996, %997) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x7x512x7xf32, #layout71>, tensor<1x512x7x7xf32, #layout72>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc846)
+    %999 = "ttnn.to_layout"(%arg97, %0) <{layout = #ttnn.layout<tile>}> : (tensor<512x1x1xf32, #layout4>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc847)
+    %1000 = "ttnn.to_device"(%999, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<512x1x1xf32, #layout56>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc847)
+    %1001 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x7x7>}> : (!tt.device<#device>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc847)
+    %1002 = "ttnn.multiply"(%998, %1000, %1001) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x7x7xf32, #layout72>, tensor<512x1x1xf32, #layout56>, tensor<1x512x7x7xf32, #layout72>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc847)
+    %1003 = "ttnn.to_layout"(%arg98, %0) <{layout = #ttnn.layout<tile>}> : (tensor<512x1x1xf32, #layout4>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc848)
+    %1004 = "ttnn.to_device"(%1003, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<512x1x1xf32, #layout56>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc848)
+    %1005 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x7x7>}> : (!tt.device<#device>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc848)
+    %1006 = "ttnn.add"(%1002, %1004, %1005) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x7x7xf32, #layout72>, tensor<512x1x1xf32, #layout56>, tensor<1x512x7x7xf32, #layout72>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc848)
+    %1007 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x7x7>}> : (!tt.device<#device>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc849)
+    %1008 = "ttnn.relu"(%1006, %1007) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x512x7x7xf32, #layout72>, tensor<1x512x7x7xf32, #layout72>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc849)
+    %1009 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x512x7>}> : (!tt.device<#device>) -> tensor<1x7x512x7xf32, #layout71> loc(#loc850)
+    %1010 = "ttnn.transpose"(%1008, %1009) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x512x7x7xf32, #layout72>, tensor<1x7x512x7xf32, #layout71>) -> tensor<1x7x512x7xf32, #layout71> loc(#loc850)
+    %1011 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x7x512>}> : (!tt.device<#device>) -> tensor<1x7x7x512xf32, #layout70> loc(#loc851)
+    %1012 = "ttnn.transpose"(%1010, %1011) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x7x512x7xf32, #layout71>, tensor<1x7x7x512xf32, #layout70>) -> tensor<1x7x7x512xf32, #layout70> loc(#loc851)
+    %1013 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x7x2048>}> : (!tt.device<#device>) -> tensor<1x7x7x2048xf32, #layout73> loc(#loc852)
+    %1014 = "ttnn.conv2d"(%1012, %arg156, %1013, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 512 : i32, input_height = 7 : i32, input_width = 7 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 2048 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x7x7x512xf32, #layout70>, tensor<2048x512x1x1xf32, #layout24>, tensor<1x7x7x2048xf32, #layout73>, !tt.device<#device>) -> tensor<1x7x7x2048xf32, #layout73> loc(#loc852)
+    %1015 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x2048x7>}> : (!tt.device<#device>) -> tensor<1x7x2048x7xf32, #layout74> loc(#loc853)
+    %1016 = "ttnn.transpose"(%1014, %1015) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x7x7x2048xf32, #layout73>, tensor<1x7x2048x7xf32, #layout74>) -> tensor<1x7x2048x7xf32, #layout74> loc(#loc853)
+    %1017 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x2048x7x7>}> : (!tt.device<#device>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc854)
+    %1018 = "ttnn.transpose"(%1016, %1017) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x7x2048x7xf32, #layout74>, tensor<1x2048x7x7xf32, #layout75>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc854)
+    %1019 = "ttnn.to_layout"(%arg99, %0) <{layout = #ttnn.layout<tile>}> : (tensor<2048x1x1xf32, #layout6>, !tt.device<#device>) -> tensor<2048x1x1xf32, #layout76> loc(#loc855)
+    %1020 = "ttnn.to_device"(%1019, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<2048x1x1xf32, #layout76>, !tt.device<#device>) -> tensor<2048x1x1xf32, #layout76> loc(#loc855)
+    %1021 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x2048x7x7>}> : (!tt.device<#device>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc855)
+    %1022 = "ttnn.multiply"(%1018, %1020, %1021) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x2048x7x7xf32, #layout75>, tensor<2048x1x1xf32, #layout76>, tensor<1x2048x7x7xf32, #layout75>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc855)
+    %1023 = "ttnn.to_layout"(%arg100, %0) <{layout = #ttnn.layout<tile>}> : (tensor<2048x1x1xf32, #layout6>, !tt.device<#device>) -> tensor<2048x1x1xf32, #layout76> loc(#loc856)
+    %1024 = "ttnn.to_device"(%1023, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<2048x1x1xf32, #layout76>, !tt.device<#device>) -> tensor<2048x1x1xf32, #layout76> loc(#loc856)
+    %1025 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x2048x7x7>}> : (!tt.device<#device>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc856)
+    %1026 = "ttnn.add"(%1022, %1024, %1025) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x2048x7x7xf32, #layout75>, tensor<2048x1x1xf32, #layout76>, tensor<1x2048x7x7xf32, #layout75>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc856)
+    %1027 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x2048x7x7>}> : (!tt.device<#device>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc857)
+    %1028 = "ttnn.add"(%1026, %968, %1027) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x2048x7x7xf32, #layout75>, tensor<1x2048x7x7xf32, #layout75>, tensor<1x2048x7x7xf32, #layout75>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc857)
+    %1029 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x2048x7x7>}> : (!tt.device<#device>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc858)
+    %1030 = "ttnn.relu"(%1028, %1029) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x2048x7x7xf32, #layout75>, tensor<1x2048x7x7xf32, #layout75>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc858)
+    %1031 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x2048x7>}> : (!tt.device<#device>) -> tensor<1x7x2048x7xf32, #layout74> loc(#loc859)
+    %1032 = "ttnn.transpose"(%1030, %1031) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x2048x7x7xf32, #layout75>, tensor<1x7x2048x7xf32, #layout74>) -> tensor<1x7x2048x7xf32, #layout74> loc(#loc859)
+    %1033 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x7x2048>}> : (!tt.device<#device>) -> tensor<1x7x7x2048xf32, #layout73> loc(#loc860)
+    %1034 = "ttnn.transpose"(%1032, %1033) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x7x2048x7xf32, #layout74>, tensor<1x7x7x2048xf32, #layout73>) -> tensor<1x7x7x2048xf32, #layout73> loc(#loc860)
+    %1035 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x7x512>}> : (!tt.device<#device>) -> tensor<1x7x7x512xf32, #layout70> loc(#loc861)
+    %1036 = "ttnn.conv2d"(%1034, %arg157, %1035, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 2048 : i32, input_height = 7 : i32, input_width = 7 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 512 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x7x7x2048xf32, #layout73>, tensor<512x2048x1x1xf32, #layout26>, tensor<1x7x7x512xf32, #layout70>, !tt.device<#device>) -> tensor<1x7x7x512xf32, #layout70> loc(#loc861)
+    %1037 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x512x7>}> : (!tt.device<#device>) -> tensor<1x7x512x7xf32, #layout71> loc(#loc862)
+    %1038 = "ttnn.transpose"(%1036, %1037) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x7x7x512xf32, #layout70>, tensor<1x7x512x7xf32, #layout71>) -> tensor<1x7x512x7xf32, #layout71> loc(#loc862)
+    %1039 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x7x7>}> : (!tt.device<#device>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc863)
+    %1040 = "ttnn.transpose"(%1038, %1039) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x7x512x7xf32, #layout71>, tensor<1x512x7x7xf32, #layout72>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc863)
+    %1041 = "ttnn.to_layout"(%arg101, %0) <{layout = #ttnn.layout<tile>}> : (tensor<512x1x1xf32, #layout4>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc864)
+    %1042 = "ttnn.to_device"(%1041, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<512x1x1xf32, #layout56>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc864)
+    %1043 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x7x7>}> : (!tt.device<#device>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc864)
+    %1044 = "ttnn.multiply"(%1040, %1042, %1043) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x7x7xf32, #layout72>, tensor<512x1x1xf32, #layout56>, tensor<1x512x7x7xf32, #layout72>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc864)
+    %1045 = "ttnn.to_layout"(%arg102, %0) <{layout = #ttnn.layout<tile>}> : (tensor<512x1x1xf32, #layout4>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc865)
+    %1046 = "ttnn.to_device"(%1045, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<512x1x1xf32, #layout56>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc865)
+    %1047 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x7x7>}> : (!tt.device<#device>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc865)
+    %1048 = "ttnn.add"(%1044, %1046, %1047) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x7x7xf32, #layout72>, tensor<512x1x1xf32, #layout56>, tensor<1x512x7x7xf32, #layout72>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc865)
+    %1049 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x7x7>}> : (!tt.device<#device>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc866)
+    %1050 = "ttnn.relu"(%1048, %1049) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x512x7x7xf32, #layout72>, tensor<1x512x7x7xf32, #layout72>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc866)
+    %1051 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x512x7>}> : (!tt.device<#device>) -> tensor<1x7x512x7xf32, #layout71> loc(#loc867)
+    %1052 = "ttnn.transpose"(%1050, %1051) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x512x7x7xf32, #layout72>, tensor<1x7x512x7xf32, #layout71>) -> tensor<1x7x512x7xf32, #layout71> loc(#loc867)
+    %1053 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x7x512>}> : (!tt.device<#device>) -> tensor<1x7x7x512xf32, #layout70> loc(#loc868)
+    %1054 = "ttnn.transpose"(%1052, %1053) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x7x512x7xf32, #layout71>, tensor<1x7x7x512xf32, #layout70>) -> tensor<1x7x7x512xf32, #layout70> loc(#loc868)
+    %1055 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x7x512>}> : (!tt.device<#device>) -> tensor<1x7x7x512xf32, #layout70> loc(#loc869)
+    %1056 = "ttnn.conv2d"(%1054, %arg158, %1055, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 512 : i32, input_height = 7 : i32, input_width = 7 : i32, kernel_height = 3 : i32, kernel_width = 3 : i32, out_channels = 512 : i32, padding_height = 1 : i32, padding_width = 1 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x7x7x512xf32, #layout70>, tensor<512x512x3x3xf32, #layout23>, tensor<1x7x7x512xf32, #layout70>, !tt.device<#device>) -> tensor<1x7x7x512xf32, #layout70> loc(#loc869)
+    %1057 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x512x7>}> : (!tt.device<#device>) -> tensor<1x7x512x7xf32, #layout71> loc(#loc870)
+    %1058 = "ttnn.transpose"(%1056, %1057) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x7x7x512xf32, #layout70>, tensor<1x7x512x7xf32, #layout71>) -> tensor<1x7x512x7xf32, #layout71> loc(#loc870)
+    %1059 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x7x7>}> : (!tt.device<#device>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc871)
+    %1060 = "ttnn.transpose"(%1058, %1059) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x7x512x7xf32, #layout71>, tensor<1x512x7x7xf32, #layout72>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc871)
+    %1061 = "ttnn.to_layout"(%arg103, %0) <{layout = #ttnn.layout<tile>}> : (tensor<512x1x1xf32, #layout4>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc872)
+    %1062 = "ttnn.to_device"(%1061, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<512x1x1xf32, #layout56>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc872)
+    %1063 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x7x7>}> : (!tt.device<#device>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc872)
+    %1064 = "ttnn.multiply"(%1060, %1062, %1063) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x7x7xf32, #layout72>, tensor<512x1x1xf32, #layout56>, tensor<1x512x7x7xf32, #layout72>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc872)
+    %1065 = "ttnn.to_layout"(%arg104, %0) <{layout = #ttnn.layout<tile>}> : (tensor<512x1x1xf32, #layout4>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc873)
+    %1066 = "ttnn.to_device"(%1065, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<512x1x1xf32, #layout56>, !tt.device<#device>) -> tensor<512x1x1xf32, #layout56> loc(#loc873)
+    %1067 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x7x7>}> : (!tt.device<#device>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc873)
+    %1068 = "ttnn.add"(%1064, %1066, %1067) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x512x7x7xf32, #layout72>, tensor<512x1x1xf32, #layout56>, tensor<1x512x7x7xf32, #layout72>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc873)
+    %1069 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x512x7x7>}> : (!tt.device<#device>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc874)
+    %1070 = "ttnn.relu"(%1068, %1069) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x512x7x7xf32, #layout72>, tensor<1x512x7x7xf32, #layout72>) -> tensor<1x512x7x7xf32, #layout72> loc(#loc874)
+    %1071 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x512x7>}> : (!tt.device<#device>) -> tensor<1x7x512x7xf32, #layout71> loc(#loc875)
+    %1072 = "ttnn.transpose"(%1070, %1071) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x512x7x7xf32, #layout72>, tensor<1x7x512x7xf32, #layout71>) -> tensor<1x7x512x7xf32, #layout71> loc(#loc875)
+    %1073 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x7x512>}> : (!tt.device<#device>) -> tensor<1x7x7x512xf32, #layout70> loc(#loc876)
+    %1074 = "ttnn.transpose"(%1072, %1073) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x7x512x7xf32, #layout71>, tensor<1x7x7x512xf32, #layout70>) -> tensor<1x7x7x512xf32, #layout70> loc(#loc876)
+    %1075 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x7x2048>}> : (!tt.device<#device>) -> tensor<1x7x7x2048xf32, #layout73> loc(#loc877)
+    %1076 = "ttnn.conv2d"(%1074, %arg159, %1075, %0) <{batch_size = 1 : i32, dilation_height = 1 : i32, dilation_width = 1 : i32, groups = 1 : i32, in_channels = 512 : i32, input_height = 7 : i32, input_width = 7 : i32, kernel_height = 1 : i32, kernel_width = 1 : i32, out_channels = 2048 : i32, padding_height = 0 : i32, padding_width = 0 : i32, stride_height = 1 : i32, stride_width = 1 : i32}> : (tensor<1x7x7x512xf32, #layout70>, tensor<2048x512x1x1xf32, #layout24>, tensor<1x7x7x2048xf32, #layout73>, !tt.device<#device>) -> tensor<1x7x7x2048xf32, #layout73> loc(#loc877)
+    %1077 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x7x2048x7>}> : (!tt.device<#device>) -> tensor<1x7x2048x7xf32, #layout74> loc(#loc878)
+    %1078 = "ttnn.transpose"(%1076, %1077) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x7x7x2048xf32, #layout73>, tensor<1x7x2048x7xf32, #layout74>) -> tensor<1x7x2048x7xf32, #layout74> loc(#loc878)
+    %1079 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x2048x7x7>}> : (!tt.device<#device>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc879)
+    %1080 = "ttnn.transpose"(%1078, %1079) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x7x2048x7xf32, #layout74>, tensor<1x2048x7x7xf32, #layout75>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc879)
+    %1081 = "ttnn.to_layout"(%arg105, %0) <{layout = #ttnn.layout<tile>}> : (tensor<2048x1x1xf32, #layout6>, !tt.device<#device>) -> tensor<2048x1x1xf32, #layout76> loc(#loc880)
+    %1082 = "ttnn.to_device"(%1081, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<2048x1x1xf32, #layout76>, !tt.device<#device>) -> tensor<2048x1x1xf32, #layout76> loc(#loc880)
+    %1083 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x2048x7x7>}> : (!tt.device<#device>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc880)
+    %1084 = "ttnn.multiply"(%1080, %1082, %1083) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x2048x7x7xf32, #layout75>, tensor<2048x1x1xf32, #layout76>, tensor<1x2048x7x7xf32, #layout75>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc880)
+    %1085 = "ttnn.to_layout"(%arg106, %0) <{layout = #ttnn.layout<tile>}> : (tensor<2048x1x1xf32, #layout6>, !tt.device<#device>) -> tensor<2048x1x1xf32, #layout76> loc(#loc881)
+    %1086 = "ttnn.to_device"(%1085, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<2048x1x1xf32, #layout76>, !tt.device<#device>) -> tensor<2048x1x1xf32, #layout76> loc(#loc881)
+    %1087 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x2048x7x7>}> : (!tt.device<#device>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc881)
+    %1088 = "ttnn.add"(%1084, %1086, %1087) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x2048x7x7xf32, #layout75>, tensor<2048x1x1xf32, #layout76>, tensor<1x2048x7x7xf32, #layout75>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc881)
+    %1089 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x2048x7x7>}> : (!tt.device<#device>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc882)
+    %1090 = "ttnn.add"(%1088, %1030, %1089) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x2048x7x7xf32, #layout75>, tensor<1x2048x7x7xf32, #layout75>, tensor<1x2048x7x7xf32, #layout75>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc882)
+    %1091 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x2048x7x7>}> : (!tt.device<#device>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc883)
+    %1092 = "ttnn.relu"(%1090, %1091) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x2048x7x7xf32, #layout75>, tensor<1x2048x7x7xf32, #layout75>) -> tensor<1x2048x7x7xf32, #layout75> loc(#loc883)
+    %1093 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1x2048x49>}> : (!tt.device<#device>) -> tensor<1x1x2048x49xf32, #layout77> loc(#loc884)
+    %1094 = "ttnn.reshape"(%1092, %1093) <{shape = [1 : i32, 1 : i32, 2048 : i32, 49 : i32]}> : (tensor<1x2048x7x7xf32, #layout75>, tensor<1x1x2048x49xf32, #layout77>) -> tensor<1x1x2048x49xf32, #layout77> loc(#loc884)
+    %1095 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1x49x2048>}> : (!tt.device<#device>) -> tensor<1x1x49x2048xf32, #layout78> loc(#loc885)
+    %1096 = "ttnn.transpose"(%1094, %1095) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x1x2048x49xf32, #layout77>, tensor<1x1x49x2048xf32, #layout78>) -> tensor<1x1x49x2048xf32, #layout78> loc(#loc885)
+    %1097 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1x1x2048>}> : (!tt.device<#device>) -> tensor<1x1x1x2048xf32, #layout79> loc(#loc886)
+    %1098 = "ttnn.mean"(%1096, %1097) <{keep_dim = true}> : (tensor<1x1x49x2048xf32, #layout78>, tensor<1x1x1x2048xf32, #layout79>) -> tensor<1x1x1x2048xf32, #layout79> loc(#loc886)
+    %1099 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x2048x1x1>}> : (!tt.device<#device>) -> tensor<1x2048x1x1xf32, #layout80> loc(#loc887)
+    %1100 = "ttnn.reshape"(%1098, %1099) <{shape = [1 : i32, 2048 : i32, 1 : i32, 1 : i32]}> : (tensor<1x1x1x2048xf32, #layout79>, tensor<1x2048x1x1xf32, #layout80>) -> tensor<1x2048x1x1xf32, #layout80> loc(#loc887)
+    %1101 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x2048x1>}> : (!tt.device<#device>) -> tensor<1x2048x1xf32, #layout81> loc(#loc888)
+    %1102 = "ttnn.reshape"(%1100, %1101) <{shape = [1 : i32, 2048 : i32, 1 : i32]}> : (tensor<1x2048x1x1xf32, #layout80>, tensor<1x2048x1xf32, #layout81>) -> tensor<1x2048x1xf32, #layout81> loc(#loc888)
+    %1103 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x2048>}> : (!tt.device<#device>) -> tensor<1x2048xf32, #layout82> loc(#loc889)
+    %1104 = "ttnn.reshape"(%1102, %1103) <{shape = [1 : i32, 2048 : i32]}> : (tensor<1x2048x1xf32, #layout81>, tensor<1x2048xf32, #layout82>) -> tensor<1x2048xf32, #layout82> loc(#loc889)
+    %1105 = "ttnn.to_layout"(%arg160, %0) <{layout = #ttnn.layout<tile>}> : (tensor<2048x1000xf32, #layout27>, !tt.device<#device>) -> tensor<2048x1000xf32, #layout83> loc(#loc890)
+    %1106 = "ttnn.to_device"(%1105, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<2048x1000xf32, #layout83>, !tt.device<#device>) -> tensor<2048x1000xf32, #layout83> loc(#loc890)
+    %1107 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1000>}> : (!tt.device<#device>) -> tensor<1x1000xf32, #layout84> loc(#loc890)
+    %1108 = "ttnn.matmul"(%1104, %1106, %1107) : (tensor<1x2048xf32, #layout82>, tensor<2048x1000xf32, #layout83>, tensor<1x1000xf32, #layout84>) -> tensor<1x1000xf32, #layout84> loc(#loc890)
+    %1109 = "ttnn.to_layout"(%arg161, %0) <{layout = #ttnn.layout<tile>}> : (tensor<1000xf32, #layout28>, !tt.device<#device>) -> tensor<1000xf32, #layout85> loc(#loc891)
+    %1110 = "ttnn.to_device"(%1109, %0) <{memory_config = #ttnn.memory_config<<interleaved>, <dram>>}> : (tensor<1000xf32, #layout85>, !tt.device<#device>) -> tensor<1000xf32, #layout85> loc(#loc891)
+    %1111 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, <dram>>, shape = #ttnn.shape<1x1000>}> : (!tt.device<#device>) -> tensor<1x1000xf32, #layout84> loc(#loc891)
+    %1112 = "ttnn.add"(%1108, %1110, %1111) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x1000xf32, #layout84>, tensor<1000xf32, #layout85>, tensor<1x1000xf32, #layout84>) -> tensor<1x1000xf32, #layout84> loc(#loc891)
+    %1113 = "ttnn.to_memory_config"(%1112, %0) : (tensor<1x1000xf32, #layout84>, !tt.device<#device>) -> tensor<1x1000xf32, #layout29> loc(#loc446)
+    return %1113 : tensor<1x1000xf32, #layout29> loc(#loc446)
+  } loc(#loc)
+} loc(#loc)
+#loc1 = loc("forward":4294967295:2951)
+#loc2 = loc("forward":4294967295:2952)
+#loc3 = loc("forward":4294967295:2954)
+#loc4 = loc("forward":4294967295:2955)
+#loc5 = loc("forward":4294967295:2956)
+#loc6 = loc("forward":4294967295:2958)
+#loc7 = loc("forward":4294967295:2960)
+#loc8 = loc("forward":4294967295:2961)
+#loc9 = loc("forward":4294967295:2962)
+#loc10 = loc("forward":4294967295:2963)
+#loc11 = loc("forward":4294967295:2964)
+#loc12 = loc("forward":4294967295:2966)
+#loc13 = loc("forward":4294967295:2967)
+#loc14 = loc("forward":4294967295:2968)
+#loc15 = loc("forward":4294967295:2970)
+#loc16 = loc("forward":4294967295:2972)
+#loc17 = loc("forward":4294967295:2973)
+#loc18 = loc("forward":4294967295:2974)
+#loc19 = loc("forward":4294967295:2975)
+#loc20 = loc("forward":4294967295:2977)
+#loc21 = loc("forward":4294967295:2978)
+#loc22 = loc("forward":4294967295:2979)
+#loc23 = loc("forward":4294967295:2981)
+#loc24 = loc("forward":4294967295:2983)
+#loc25 = loc("forward":4294967295:2984)
+#loc26 = loc("forward":4294967295:2985)
+#loc27 = loc("forward":4294967295:2986)
+#loc28 = loc("forward":4294967295:2988)
+#loc29 = loc("forward":4294967295:2989)
+#loc30 = loc("forward":4294967295:2990)
+#loc31 = loc("forward":4294967295:2992)
+#loc32 = loc("forward":4294967295:2994)
+#loc33 = loc("forward":4294967295:2995)
+#loc34 = loc("forward":4294967295:2996)
+#loc35 = loc("forward":4294967295:2998)
+#loc36 = loc("forward":4294967295:2999)
+#loc37 = loc("forward":4294967295:3000)
+#loc38 = loc("forward":4294967295:3002)
+#loc39 = loc("forward":4294967295:3004)
+#loc40 = loc("forward":4294967295:3005)
+#loc41 = loc("forward":4294967295:3006)
+#loc42 = loc("forward":4294967295:3007)
+#loc43 = loc("forward":4294967295:3008)
+#loc44 = loc("forward":4294967295:3010)
+#loc45 = loc("forward":4294967295:3011)
+#loc46 = loc("forward":4294967295:3012)
+#loc47 = loc("forward":4294967295:3014)
+#loc48 = loc("forward":4294967295:3016)
+#loc49 = loc("forward":4294967295:3017)
+#loc50 = loc("forward":4294967295:3018)
+#loc51 = loc("forward":4294967295:3019)
+#loc52 = loc("forward":4294967295:3021)
+#loc53 = loc("forward":4294967295:3022)
+#loc54 = loc("forward":4294967295:3023)
+#loc55 = loc("forward":4294967295:3025)
+#loc56 = loc("forward":4294967295:3027)
+#loc57 = loc("forward":4294967295:3028)
+#loc58 = loc("forward":4294967295:3029)
+#loc59 = loc("forward":4294967295:3030)
+#loc60 = loc("forward":4294967295:3032)
+#loc61 = loc("forward":4294967295:3033)
+#loc62 = loc("forward":4294967295:3034)
+#loc63 = loc("forward":4294967295:3036)
+#loc64 = loc("forward":4294967295:3038)
+#loc65 = loc("forward":4294967295:3039)
+#loc66 = loc("forward":4294967295:3040)
+#loc67 = loc("forward":4294967295:3041)
+#loc68 = loc("forward":4294967295:3042)
+#loc69 = loc("forward":4294967295:3044)
+#loc70 = loc("forward":4294967295:3045)
+#loc71 = loc("forward":4294967295:3046)
+#loc72 = loc("forward":4294967295:3048)
+#loc73 = loc("forward":4294967295:3050)
+#loc74 = loc("forward":4294967295:3051)
+#loc75 = loc("forward":4294967295:3052)
+#loc76 = loc("forward":4294967295:3053)
+#loc77 = loc("forward":4294967295:3055)
+#loc78 = loc("forward":4294967295:3056)
+#loc79 = loc("forward":4294967295:3057)
+#loc80 = loc("forward":4294967295:3059)
+#loc81 = loc("forward":4294967295:3061)
+#loc82 = loc("forward":4294967295:3062)
+#loc83 = loc("forward":4294967295:3063)
+#loc84 = loc("forward":4294967295:3064)
+#loc85 = loc("forward":4294967295:3066)
+#loc86 = loc("forward":4294967295:3067)
+#loc87 = loc("forward":4294967295:3068)
+#loc88 = loc("forward":4294967295:3070)
+#loc89 = loc("forward":4294967295:3072)
+#loc90 = loc("forward":4294967295:3073)
+#loc91 = loc("forward":4294967295:3074)
+#loc92 = loc("forward":4294967295:3075)
+#loc93 = loc("forward":4294967295:3076)
+#loc94 = loc("forward":4294967295:3078)
+#loc95 = loc("forward":4294967295:3079)
+#loc96 = loc("forward":4294967295:3080)
+#loc97 = loc("forward":4294967295:3082)
+#loc98 = loc("forward":4294967295:3084)
+#loc99 = loc("forward":4294967295:3085)
+#loc100 = loc("forward":4294967295:3086)
+#loc101 = loc("forward":4294967295:3087)
+#loc102 = loc("forward":4294967295:3089)
+#loc103 = loc("forward":4294967295:3090)
+#loc104 = loc("forward":4294967295:3091)
+#loc105 = loc("forward":4294967295:3093)
+#loc106 = loc("forward":4294967295:3095)
+#loc107 = loc("forward":4294967295:3096)
+#loc108 = loc("forward":4294967295:3097)
+#loc109 = loc("forward":4294967295:3098)
+#loc110 = loc("forward":4294967295:3100)
+#loc111 = loc("forward":4294967295:3101)
+#loc112 = loc("forward":4294967295:3102)
+#loc113 = loc("forward":4294967295:3104)
+#loc114 = loc("forward":4294967295:3106)
+#loc115 = loc("forward":4294967295:3107)
+#loc116 = loc("forward":4294967295:3108)
+#loc117 = loc("forward":4294967295:3110)
+#loc118 = loc("forward":4294967295:3111)
+#loc119 = loc("forward":4294967295:3112)
+#loc120 = loc("forward":4294967295:3114)
+#loc121 = loc("forward":4294967295:3116)
+#loc122 = loc("forward":4294967295:3117)
+#loc123 = loc("forward":4294967295:3118)
+#loc124 = loc("forward":4294967295:3119)
+#loc125 = loc("forward":4294967295:3120)
+#loc126 = loc("forward":4294967295:3122)
+#loc127 = loc("forward":4294967295:3123)
+#loc128 = loc("forward":4294967295:3124)
+#loc129 = loc("forward":4294967295:3126)
+#loc130 = loc("forward":4294967295:3128)
+#loc131 = loc("forward":4294967295:3129)
+#loc132 = loc("forward":4294967295:3130)
+#loc133 = loc("forward":4294967295:3131)
+#loc134 = loc("forward":4294967295:3133)
+#loc135 = loc("forward":4294967295:3134)
+#loc136 = loc("forward":4294967295:3135)
+#loc137 = loc("forward":4294967295:3137)
+#loc138 = loc("forward":4294967295:3139)
+#loc139 = loc("forward":4294967295:3140)
+#loc140 = loc("forward":4294967295:3141)
+#loc141 = loc("forward":4294967295:3142)
+#loc142 = loc("forward":4294967295:3144)
+#loc143 = loc("forward":4294967295:3145)
+#loc144 = loc("forward":4294967295:3146)
+#loc145 = loc("forward":4294967295:3148)
+#loc146 = loc("forward":4294967295:3150)
+#loc147 = loc("forward":4294967295:3151)
+#loc148 = loc("forward":4294967295:3152)
+#loc149 = loc("forward":4294967295:3153)
+#loc150 = loc("forward":4294967295:3154)
+#loc151 = loc("forward":4294967295:3156)
+#loc152 = loc("forward":4294967295:3157)
+#loc153 = loc("forward":4294967295:3158)
+#loc154 = loc("forward":4294967295:3160)
+#loc155 = loc("forward":4294967295:3162)
+#loc156 = loc("forward":4294967295:3163)
+#loc157 = loc("forward":4294967295:3164)
+#loc158 = loc("forward":4294967295:3165)
+#loc159 = loc("forward":4294967295:3167)
+#loc160 = loc("forward":4294967295:3168)
+#loc161 = loc("forward":4294967295:3169)
+#loc162 = loc("forward":4294967295:3171)
+#loc163 = loc("forward":4294967295:3173)
+#loc164 = loc("forward":4294967295:3174)
+#loc165 = loc("forward":4294967295:3175)
+#loc166 = loc("forward":4294967295:3176)
+#loc167 = loc("forward":4294967295:3178)
+#loc168 = loc("forward":4294967295:3179)
+#loc169 = loc("forward":4294967295:3180)
+#loc170 = loc("forward":4294967295:3182)
+#loc171 = loc("forward":4294967295:3184)
+#loc172 = loc("forward":4294967295:3185)
+#loc173 = loc("forward":4294967295:3186)
+#loc174 = loc("forward":4294967295:3187)
+#loc175 = loc("forward":4294967295:3188)
+#loc176 = loc("forward":4294967295:3190)
+#loc177 = loc("forward":4294967295:3191)
+#loc178 = loc("forward":4294967295:3192)
+#loc179 = loc("forward":4294967295:3194)
+#loc180 = loc("forward":4294967295:3196)
+#loc181 = loc("forward":4294967295:3197)
+#loc182 = loc("forward":4294967295:3198)
+#loc183 = loc("forward":4294967295:3199)
+#loc184 = loc("forward":4294967295:3201)
+#loc185 = loc("forward":4294967295:3202)
+#loc186 = loc("forward":4294967295:3203)
+#loc187 = loc("forward":4294967295:3205)
+#loc188 = loc("forward":4294967295:3207)
+#loc189 = loc("forward":4294967295:3208)
+#loc190 = loc("forward":4294967295:3209)
+#loc191 = loc("forward":4294967295:3210)
+#loc192 = loc("forward":4294967295:3212)
+#loc193 = loc("forward":4294967295:3213)
+#loc194 = loc("forward":4294967295:3214)
+#loc195 = loc("forward":4294967295:3216)
+#loc196 = loc("forward":4294967295:3218)
+#loc197 = loc("forward":4294967295:3219)
+#loc198 = loc("forward":4294967295:3220)
+#loc199 = loc("forward":4294967295:3221)
+#loc200 = loc("forward":4294967295:3222)
+#loc201 = loc("forward":4294967295:3224)
+#loc202 = loc("forward":4294967295:3225)
+#loc203 = loc("forward":4294967295:3226)
+#loc204 = loc("forward":4294967295:3228)
+#loc205 = loc("forward":4294967295:3230)
+#loc206 = loc("forward":4294967295:3231)
+#loc207 = loc("forward":4294967295:3232)
+#loc208 = loc("forward":4294967295:3233)
+#loc209 = loc("forward":4294967295:3235)
+#loc210 = loc("forward":4294967295:3236)
+#loc211 = loc("forward":4294967295:3237)
+#loc212 = loc("forward":4294967295:3239)
+#loc213 = loc("forward":4294967295:3241)
+#loc214 = loc("forward":4294967295:3242)
+#loc215 = loc("forward":4294967295:3243)
+#loc216 = loc("forward":4294967295:3244)
+#loc217 = loc("forward":4294967295:3246)
+#loc218 = loc("forward":4294967295:3247)
+#loc219 = loc("forward":4294967295:3248)
+#loc220 = loc("forward":4294967295:3250)
+#loc221 = loc("forward":4294967295:3252)
+#loc222 = loc("forward":4294967295:3253)
+#loc223 = loc("forward":4294967295:3254)
+#loc224 = loc("forward":4294967295:3256)
+#loc225 = loc("forward":4294967295:3257)
+#loc226 = loc("forward":4294967295:3258)
+#loc227 = loc("forward":4294967295:3260)
+#loc228 = loc("forward":4294967295:3262)
+#loc229 = loc("forward":4294967295:3263)
+#loc230 = loc("forward":4294967295:3264)
+#loc231 = loc("forward":4294967295:3265)
+#loc232 = loc("forward":4294967295:3266)
+#loc233 = loc("forward":4294967295:3268)
+#loc234 = loc("forward":4294967295:3269)
+#loc235 = loc("forward":4294967295:3270)
+#loc236 = loc("forward":4294967295:3272)
+#loc237 = loc("forward":4294967295:3274)
+#loc238 = loc("forward":4294967295:3275)
+#loc239 = loc("forward":4294967295:3276)
+#loc240 = loc("forward":4294967295:3277)
+#loc241 = loc("forward":4294967295:3279)
+#loc242 = loc("forward":4294967295:3280)
+#loc243 = loc("forward":4294967295:3281)
+#loc244 = loc("forward":4294967295:3283)
+#loc245 = loc("forward":4294967295:3285)
+#loc246 = loc("forward":4294967295:3286)
+#loc247 = loc("forward":4294967295:3287)
+#loc248 = loc("forward":4294967295:3288)
+#loc249 = loc("forward":4294967295:3290)
+#loc250 = loc("forward":4294967295:3291)
+#loc251 = loc("forward":4294967295:3292)
+#loc252 = loc("forward":4294967295:3294)
+#loc253 = loc("forward":4294967295:3296)
+#loc254 = loc("forward":4294967295:3297)
+#loc255 = loc("forward":4294967295:3298)
+#loc256 = loc("forward":4294967295:3299)
+#loc257 = loc("forward":4294967295:3300)
+#loc258 = loc("forward":4294967295:3302)
+#loc259 = loc("forward":4294967295:3303)
+#loc260 = loc("forward":4294967295:3304)
+#loc261 = loc("forward":4294967295:3306)
+#loc262 = loc("forward":4294967295:3308)
+#loc263 = loc("forward":4294967295:3309)
+#loc264 = loc("forward":4294967295:3310)
+#loc265 = loc("forward":4294967295:3311)
+#loc266 = loc("forward":4294967295:3313)
+#loc267 = loc("forward":4294967295:3314)
+#loc268 = loc("forward":4294967295:3315)
+#loc269 = loc("forward":4294967295:3317)
+#loc270 = loc("forward":4294967295:3319)
+#loc271 = loc("forward":4294967295:3320)
+#loc272 = loc("forward":4294967295:3321)
+#loc273 = loc("forward":4294967295:3322)
+#loc274 = loc("forward":4294967295:3324)
+#loc275 = loc("forward":4294967295:3325)
+#loc276 = loc("forward":4294967295:3326)
+#loc277 = loc("forward":4294967295:3328)
+#loc278 = loc("forward":4294967295:3330)
+#loc279 = loc("forward":4294967295:3331)
+#loc280 = loc("forward":4294967295:3332)
+#loc281 = loc("forward":4294967295:3333)
+#loc282 = loc("forward":4294967295:3334)
+#loc283 = loc("forward":4294967295:3336)
+#loc284 = loc("forward":4294967295:3337)
+#loc285 = loc("forward":4294967295:3338)
+#loc286 = loc("forward":4294967295:3340)
+#loc287 = loc("forward":4294967295:3342)
+#loc288 = loc("forward":4294967295:3343)
+#loc289 = loc("forward":4294967295:3344)
+#loc290 = loc("forward":4294967295:3345)
+#loc291 = loc("forward":4294967295:3347)
+#loc292 = loc("forward":4294967295:3348)
+#loc293 = loc("forward":4294967295:3349)
+#loc294 = loc("forward":4294967295:3351)
+#loc295 = loc("forward":4294967295:3353)
+#loc296 = loc("forward":4294967295:3354)
+#loc297 = loc("forward":4294967295:3355)
+#loc298 = loc("forward":4294967295:3356)
+#loc299 = loc("forward":4294967295:3358)
+#loc300 = loc("forward":4294967295:3359)
+#loc301 = loc("forward":4294967295:3360)
+#loc302 = loc("forward":4294967295:3362)
+#loc303 = loc("forward":4294967295:3364)
+#loc304 = loc("forward":4294967295:3365)
+#loc305 = loc("forward":4294967295:3366)
+#loc306 = loc("forward":4294967295:3367)
+#loc307 = loc("forward":4294967295:3368)
+#loc308 = loc("forward":4294967295:3370)
+#loc309 = loc("forward":4294967295:3371)
+#loc310 = loc("forward":4294967295:3372)
+#loc311 = loc("forward":4294967295:3374)
+#loc312 = loc("forward":4294967295:3376)
+#loc313 = loc("forward":4294967295:3377)
+#loc314 = loc("forward":4294967295:3378)
+#loc315 = loc("forward":4294967295:3379)
+#loc316 = loc("forward":4294967295:3381)
+#loc317 = loc("forward":4294967295:3382)
+#loc318 = loc("forward":4294967295:3383)
+#loc319 = loc("forward":4294967295:3385)
+#loc320 = loc("forward":4294967295:3387)
+#loc321 = loc("forward":4294967295:3388)
+#loc322 = loc("forward":4294967295:3389)
+#loc323 = loc("forward":4294967295:3390)
+#loc324 = loc("forward":4294967295:3392)
+#loc325 = loc("forward":4294967295:3393)
+#loc326 = loc("forward":4294967295:3394)
+#loc327 = loc("forward":4294967295:3396)
+#loc328 = loc("forward":4294967295:3398)
+#loc329 = loc("forward":4294967295:3399)
+#loc330 = loc("forward":4294967295:3400)
+#loc331 = loc("forward":4294967295:3401)
+#loc332 = loc("forward":4294967295:3402)
+#loc333 = loc("forward":4294967295:3404)
+#loc334 = loc("forward":4294967295:3405)
+#loc335 = loc("forward":4294967295:3406)
+#loc336 = loc("forward":4294967295:3408)
+#loc337 = loc("forward":4294967295:3410)
+#loc338 = loc("forward":4294967295:3411)
+#loc339 = loc("forward":4294967295:3412)
+#loc340 = loc("forward":4294967295:3413)
+#loc341 = loc("forward":4294967295:3415)
+#loc342 = loc("forward":4294967295:3416)
+#loc343 = loc("forward":4294967295:3417)
+#loc344 = loc("forward":4294967295:3419)
+#loc345 = loc("forward":4294967295:3421)
+#loc346 = loc("forward":4294967295:3422)
+#loc347 = loc("forward":4294967295:3423)
+#loc348 = loc("forward":4294967295:3424)
+#loc349 = loc("forward":4294967295:3426)
+#loc350 = loc("forward":4294967295:3427)
+#loc351 = loc("forward":4294967295:3428)
+#loc352 = loc("forward":4294967295:3430)
+#loc353 = loc("forward":4294967295:3432)
+#loc354 = loc("forward":4294967295:3433)
+#loc355 = loc("forward":4294967295:3434)
+#loc356 = loc("forward":4294967295:3435)
+#loc357 = loc("forward":4294967295:3436)
+#loc358 = loc("forward":4294967295:3438)
+#loc359 = loc("forward":4294967295:3439)
+#loc360 = loc("forward":4294967295:3440)
+#loc361 = loc("forward":4294967295:3442)
+#loc362 = loc("forward":4294967295:3444)
+#loc363 = loc("forward":4294967295:3445)
+#loc364 = loc("forward":4294967295:3446)
+#loc365 = loc("forward":4294967295:3447)
+#loc366 = loc("forward":4294967295:3449)
+#loc367 = loc("forward":4294967295:3450)
+#loc368 = loc("forward":4294967295:3451)
+#loc369 = loc("forward":4294967295:3453)
+#loc370 = loc("forward":4294967295:3455)
+#loc371 = loc("forward":4294967295:3456)
+#loc372 = loc("forward":4294967295:3457)
+#loc373 = loc("forward":4294967295:3458)
+#loc374 = loc("forward":4294967295:3460)
+#loc375 = loc("forward":4294967295:3461)
+#loc376 = loc("forward":4294967295:3462)
+#loc377 = loc("forward":4294967295:3464)
+#loc378 = loc("forward":4294967295:3466)
+#loc379 = loc("forward":4294967295:3467)
+#loc380 = loc("forward":4294967295:3468)
+#loc381 = loc("forward":4294967295:3470)
+#loc382 = loc("forward":4294967295:3471)
+#loc383 = loc("forward":4294967295:3472)
+#loc384 = loc("forward":4294967295:3474)
+#loc385 = loc("forward":4294967295:3476)
+#loc386 = loc("forward":4294967295:3477)
+#loc387 = loc("forward":4294967295:3478)
+#loc388 = loc("forward":4294967295:3479)
+#loc389 = loc("forward":4294967295:3480)
+#loc390 = loc("forward":4294967295:3482)
+#loc391 = loc("forward":4294967295:3483)
+#loc392 = loc("forward":4294967295:3484)
+#loc393 = loc("forward":4294967295:3486)
+#loc394 = loc("forward":4294967295:3488)
+#loc395 = loc("forward":4294967295:3489)
+#loc396 = loc("forward":4294967295:3490)
+#loc397 = loc("forward":4294967295:3491)
+#loc398 = loc("forward":4294967295:3493)
+#loc399 = loc("forward":4294967295:3494)
+#loc400 = loc("forward":4294967295:3495)
+#loc401 = loc("forward":4294967295:3497)
+#loc402 = loc("forward":4294967295:3499)
+#loc403 = loc("forward":4294967295:3500)
+#loc404 = loc("forward":4294967295:3501)
+#loc405 = loc("forward":4294967295:3502)
+#loc406 = loc("forward":4294967295:3504)
+#loc407 = loc("forward":4294967295:3505)
+#loc408 = loc("forward":4294967295:3506)
+#loc409 = loc("forward":4294967295:3508)
+#loc410 = loc("forward":4294967295:3510)
+#loc411 = loc("forward":4294967295:3511)
+#loc412 = loc("forward":4294967295:3512)
+#loc413 = loc("forward":4294967295:3513)
+#loc414 = loc("forward":4294967295:3514)
+#loc415 = loc("forward":4294967295:3516)
+#loc416 = loc("forward":4294967295:3517)
+#loc417 = loc("forward":4294967295:3518)
+#loc418 = loc("forward":4294967295:3520)
+#loc419 = loc("forward":4294967295:3522)
+#loc420 = loc("forward":4294967295:3523)
+#loc421 = loc("forward":4294967295:3524)
+#loc422 = loc("forward":4294967295:3525)
+#loc423 = loc("forward":4294967295:3527)
+#loc424 = loc("forward":4294967295:3528)
+#loc425 = loc("forward":4294967295:3529)
+#loc426 = loc("forward":4294967295:3531)
+#loc427 = loc("forward":4294967295:3533)
+#loc428 = loc("forward":4294967295:3534)
+#loc429 = loc("forward":4294967295:3535)
+#loc430 = loc("forward":4294967295:3536)
+#loc431 = loc("forward":4294967295:3538)
+#loc432 = loc("forward":4294967295:3539)
+#loc433 = loc("forward":4294967295:3540)
+#loc434 = loc("forward":4294967295:3542)
+#loc435 = loc("forward":4294967295:3544)
+#loc436 = loc("forward":4294967295:3545)
+#loc437 = loc("forward":4294967295:3546)
+#loc438 = loc("forward":4294967295:3547)
+#loc439 = loc("forward":4294967295:3548)
+#loc440 = loc("forward":4294967295:3549)
+#loc441 = loc("forward":4294967295:3550)
+#loc442 = loc("forward":4294967295:3551)
+#loc443 = loc("forward":4294967295:3552)
+#loc444 = loc("forward":4294967295:3554)
+#loc445 = loc("forward":4294967295:3556)
+#loc446 = loc(unknown)
+#loc447 = loc("conv2d_0.dc.transpose.0"(#loc1))
+#loc448 = loc("conv2d_0.dc.transpose.1"(#loc2))
+#loc449 = loc("conv2d_0.dc.conv2d.2"(#loc3))
+#loc450 = loc("conv2d_0.dc.transpose.3"(#loc4))
+#loc451 = loc("conv2d_0.dc.transpose.4"(#loc5))
+#loc452 = loc("multiply_8"(#loc6))
+#loc453 = loc("add_14"(#loc7))
+#loc454 = loc("relu_15"(#loc8))
+#loc455 = loc("max_pool2d_16"(#loc9))
+#loc456 = loc("conv2d_17.dc.transpose.0"(#loc10))
+#loc457 = loc("conv2d_17.dc.transpose.1"(#loc11))
+#loc458 = loc("conv2d_17.dc.conv2d.2"(#loc12))
+#loc459 = loc("conv2d_17.dc.transpose.3"(#loc13))
+#loc460 = loc("conv2d_17.dc.transpose.4"(#loc14))
+#loc461 = loc("multiply_25"(#loc15))
+#loc462 = loc("add_31"(#loc16))
+#loc463 = loc("relu_32"(#loc17))
+#loc464 = loc("conv2d_33.dc.transpose.0"(#loc18))
+#loc465 = loc("conv2d_33.dc.transpose.1"(#loc19))
+#loc466 = loc("conv2d_33.dc.conv2d.2"(#loc20))
+#loc467 = loc("conv2d_33.dc.transpose.3"(#loc21))
+#loc468 = loc("conv2d_33.dc.transpose.4"(#loc22))
+#loc469 = loc("multiply_41"(#loc23))
+#loc470 = loc("add_47"(#loc24))
+#loc471 = loc("relu_48"(#loc25))
+#loc472 = loc("conv2d_49.dc.transpose.0"(#loc26))
+#loc473 = loc("conv2d_49.dc.transpose.1"(#loc27))
+#loc474 = loc("conv2d_49.dc.conv2d.2"(#loc28))
+#loc475 = loc("conv2d_49.dc.transpose.3"(#loc29))
+#loc476 = loc("conv2d_49.dc.transpose.4"(#loc30))
+#loc477 = loc("multiply_57"(#loc31))
+#loc478 = loc("add_63"(#loc32))
+#loc479 = loc("conv2d_64.dc.transpose.0"(#loc33))
+#loc480 = loc("conv2d_64.dc.transpose.1"(#loc34))
+#loc481 = loc("conv2d_64.dc.conv2d.2"(#loc35))
+#loc482 = loc("conv2d_64.dc.transpose.3"(#loc36))
+#loc483 = loc("conv2d_64.dc.transpose.4"(#loc37))
+#loc484 = loc("multiply_72"(#loc38))
+#loc485 = loc("add_78"(#loc39))
+#loc486 = loc("add_79"(#loc40))
+#loc487 = loc("relu_80"(#loc41))
+#loc488 = loc("conv2d_81.dc.transpose.0"(#loc42))
+#loc489 = loc("conv2d_81.dc.transpose.1"(#loc43))
+#loc490 = loc("conv2d_81.dc.conv2d.2"(#loc44))
+#loc491 = loc("conv2d_81.dc.transpose.3"(#loc45))
+#loc492 = loc("conv2d_81.dc.transpose.4"(#loc46))
+#loc493 = loc("multiply_89"(#loc47))
+#loc494 = loc("add_95"(#loc48))
+#loc495 = loc("relu_96"(#loc49))
+#loc496 = loc("conv2d_97.dc.transpose.0"(#loc50))
+#loc497 = loc("conv2d_97.dc.transpose.1"(#loc51))
+#loc498 = loc("conv2d_97.dc.conv2d.2"(#loc52))
+#loc499 = loc("conv2d_97.dc.transpose.3"(#loc53))
+#loc500 = loc("conv2d_97.dc.transpose.4"(#loc54))
+#loc501 = loc("multiply_105"(#loc55))
+#loc502 = loc("add_111"(#loc56))
+#loc503 = loc("relu_112"(#loc57))
+#loc504 = loc("conv2d_113.dc.transpose.0"(#loc58))
+#loc505 = loc("conv2d_113.dc.transpose.1"(#loc59))
+#loc506 = loc("conv2d_113.dc.conv2d.2"(#loc60))
+#loc507 = loc("conv2d_113.dc.transpose.3"(#loc61))
+#loc508 = loc("conv2d_113.dc.transpose.4"(#loc62))
+#loc509 = loc("multiply_121"(#loc63))
+#loc510 = loc("add_127"(#loc64))
+#loc511 = loc("add_128"(#loc65))
+#loc512 = loc("relu_129"(#loc66))
+#loc513 = loc("conv2d_130.dc.transpose.0"(#loc67))
+#loc514 = loc("conv2d_130.dc.transpose.1"(#loc68))
+#loc515 = loc("conv2d_130.dc.conv2d.2"(#loc69))
+#loc516 = loc("conv2d_130.dc.transpose.3"(#loc70))
+#loc517 = loc("conv2d_130.dc.transpose.4"(#loc71))
+#loc518 = loc("multiply_138"(#loc72))
+#loc519 = loc("add_144"(#loc73))
+#loc520 = loc("relu_145"(#loc74))
+#loc521 = loc("conv2d_146.dc.transpose.0"(#loc75))
+#loc522 = loc("conv2d_146.dc.transpose.1"(#loc76))
+#loc523 = loc("conv2d_146.dc.conv2d.2"(#loc77))
+#loc524 = loc("conv2d_146.dc.transpose.3"(#loc78))
+#loc525 = loc("conv2d_146.dc.transpose.4"(#loc79))
+#loc526 = loc("multiply_154"(#loc80))
+#loc527 = loc("add_160"(#loc81))
+#loc528 = loc("relu_161"(#loc82))
+#loc529 = loc("conv2d_162.dc.transpose.0"(#loc83))
+#loc530 = loc("conv2d_162.dc.transpose.1"(#loc84))
+#loc531 = loc("conv2d_162.dc.conv2d.2"(#loc85))
+#loc532 = loc("conv2d_162.dc.transpose.3"(#loc86))
+#loc533 = loc("conv2d_162.dc.transpose.4"(#loc87))
+#loc534 = loc("multiply_170"(#loc88))
+#loc535 = loc("add_176"(#loc89))
+#loc536 = loc("add_177"(#loc90))
+#loc537 = loc("relu_178"(#loc91))
+#loc538 = loc("conv2d_179.dc.transpose.0"(#loc92))
+#loc539 = loc("conv2d_179.dc.transpose.1"(#loc93))
+#loc540 = loc("conv2d_179.dc.conv2d.2"(#loc94))
+#loc541 = loc("conv2d_179.dc.transpose.3"(#loc95))
+#loc542 = loc("conv2d_179.dc.transpose.4"(#loc96))
+#loc543 = loc("multiply_187"(#loc97))
+#loc544 = loc("add_193"(#loc98))
+#loc545 = loc("relu_194"(#loc99))
+#loc546 = loc("conv2d_195.dc.transpose.0"(#loc100))
+#loc547 = loc("conv2d_195.dc.transpose.1"(#loc101))
+#loc548 = loc("conv2d_195.dc.conv2d.2"(#loc102))
+#loc549 = loc("conv2d_195.dc.transpose.3"(#loc103))
+#loc550 = loc("conv2d_195.dc.transpose.4"(#loc104))
+#loc551 = loc("multiply_203"(#loc105))
+#loc552 = loc("add_209"(#loc106))
+#loc553 = loc("relu_210"(#loc107))
+#loc554 = loc("conv2d_211.dc.transpose.0"(#loc108))
+#loc555 = loc("conv2d_211.dc.transpose.1"(#loc109))
+#loc556 = loc("conv2d_211.dc.conv2d.2"(#loc110))
+#loc557 = loc("conv2d_211.dc.transpose.3"(#loc111))
+#loc558 = loc("conv2d_211.dc.transpose.4"(#loc112))
+#loc559 = loc("multiply_219"(#loc113))
+#loc560 = loc("add_225"(#loc114))
+#loc561 = loc("conv2d_226.dc.transpose.0"(#loc115))
+#loc562 = loc("conv2d_226.dc.transpose.1"(#loc116))
+#loc563 = loc("conv2d_226.dc.conv2d.2"(#loc117))
+#loc564 = loc("conv2d_226.dc.transpose.3"(#loc118))
+#loc565 = loc("conv2d_226.dc.transpose.4"(#loc119))
+#loc566 = loc("multiply_234"(#loc120))
+#loc567 = loc("add_240"(#loc121))
+#loc568 = loc("add_241"(#loc122))
+#loc569 = loc("relu_242"(#loc123))
+#loc570 = loc("conv2d_243.dc.transpose.0"(#loc124))
+#loc571 = loc("conv2d_243.dc.transpose.1"(#loc125))
+#loc572 = loc("conv2d_243.dc.conv2d.2"(#loc126))
+#loc573 = loc("conv2d_243.dc.transpose.3"(#loc127))
+#loc574 = loc("conv2d_243.dc.transpose.4"(#loc128))
+#loc575 = loc("multiply_251"(#loc129))
+#loc576 = loc("add_257"(#loc130))
+#loc577 = loc("relu_258"(#loc131))
+#loc578 = loc("conv2d_259.dc.transpose.0"(#loc132))
+#loc579 = loc("conv2d_259.dc.transpose.1"(#loc133))
+#loc580 = loc("conv2d_259.dc.conv2d.2"(#loc134))
+#loc581 = loc("conv2d_259.dc.transpose.3"(#loc135))
+#loc582 = loc("conv2d_259.dc.transpose.4"(#loc136))
+#loc583 = loc("multiply_267"(#loc137))
+#loc584 = loc("add_273"(#loc138))
+#loc585 = loc("relu_274"(#loc139))
+#loc586 = loc("conv2d_275.dc.transpose.0"(#loc140))
+#loc587 = loc("conv2d_275.dc.transpose.1"(#loc141))
+#loc588 = loc("conv2d_275.dc.conv2d.2"(#loc142))
+#loc589 = loc("conv2d_275.dc.transpose.3"(#loc143))
+#loc590 = loc("conv2d_275.dc.transpose.4"(#loc144))
+#loc591 = loc("multiply_283"(#loc145))
+#loc592 = loc("add_289"(#loc146))
+#loc593 = loc("add_290"(#loc147))
+#loc594 = loc("relu_291"(#loc148))
+#loc595 = loc("conv2d_292.dc.transpose.0"(#loc149))
+#loc596 = loc("conv2d_292.dc.transpose.1"(#loc150))
+#loc597 = loc("conv2d_292.dc.conv2d.2"(#loc151))
+#loc598 = loc("conv2d_292.dc.transpose.3"(#loc152))
+#loc599 = loc("conv2d_292.dc.transpose.4"(#loc153))
+#loc600 = loc("multiply_300"(#loc154))
+#loc601 = loc("add_306"(#loc155))
+#loc602 = loc("relu_307"(#loc156))
+#loc603 = loc("conv2d_308.dc.transpose.0"(#loc157))
+#loc604 = loc("conv2d_308.dc.transpose.1"(#loc158))
+#loc605 = loc("conv2d_308.dc.conv2d.2"(#loc159))
+#loc606 = loc("conv2d_308.dc.transpose.3"(#loc160))
+#loc607 = loc("conv2d_308.dc.transpose.4"(#loc161))
+#loc608 = loc("multiply_316"(#loc162))
+#loc609 = loc("add_322"(#loc163))
+#loc610 = loc("relu_323"(#loc164))
+#loc611 = loc("conv2d_324.dc.transpose.0"(#loc165))
+#loc612 = loc("conv2d_324.dc.transpose.1"(#loc166))
+#loc613 = loc("conv2d_324.dc.conv2d.2"(#loc167))
+#loc614 = loc("conv2d_324.dc.transpose.3"(#loc168))
+#loc615 = loc("conv2d_324.dc.transpose.4"(#loc169))
+#loc616 = loc("multiply_332"(#loc170))
+#loc617 = loc("add_338"(#loc171))
+#loc618 = loc("add_339"(#loc172))
+#loc619 = loc("relu_340"(#loc173))
+#loc620 = loc("conv2d_341.dc.transpose.0"(#loc174))
+#loc621 = loc("conv2d_341.dc.transpose.1"(#loc175))
+#loc622 = loc("conv2d_341.dc.conv2d.2"(#loc176))
+#loc623 = loc("conv2d_341.dc.transpose.3"(#loc177))
+#loc624 = loc("conv2d_341.dc.transpose.4"(#loc178))
+#loc625 = loc("multiply_349"(#loc179))
+#loc626 = loc("add_355"(#loc180))
+#loc627 = loc("relu_356"(#loc181))
+#loc628 = loc("conv2d_357.dc.transpose.0"(#loc182))
+#loc629 = loc("conv2d_357.dc.transpose.1"(#loc183))
+#loc630 = loc("conv2d_357.dc.conv2d.2"(#loc184))
+#loc631 = loc("conv2d_357.dc.transpose.3"(#loc185))
+#loc632 = loc("conv2d_357.dc.transpose.4"(#loc186))
+#loc633 = loc("multiply_365"(#loc187))
+#loc634 = loc("add_371"(#loc188))
+#loc635 = loc("relu_372"(#loc189))
+#loc636 = loc("conv2d_373.dc.transpose.0"(#loc190))
+#loc637 = loc("conv2d_373.dc.transpose.1"(#loc191))
+#loc638 = loc("conv2d_373.dc.conv2d.2"(#loc192))
+#loc639 = loc("conv2d_373.dc.transpose.3"(#loc193))
+#loc640 = loc("conv2d_373.dc.transpose.4"(#loc194))
+#loc641 = loc("multiply_381"(#loc195))
+#loc642 = loc("add_387"(#loc196))
+#loc643 = loc("add_388"(#loc197))
+#loc644 = loc("relu_389"(#loc198))
+#loc645 = loc("conv2d_390.dc.transpose.0"(#loc199))
+#loc646 = loc("conv2d_390.dc.transpose.1"(#loc200))
+#loc647 = loc("conv2d_390.dc.conv2d.2"(#loc201))
+#loc648 = loc("conv2d_390.dc.transpose.3"(#loc202))
+#loc649 = loc("conv2d_390.dc.transpose.4"(#loc203))
+#loc650 = loc("multiply_398"(#loc204))
+#loc651 = loc("add_404"(#loc205))
+#loc652 = loc("relu_405"(#loc206))
+#loc653 = loc("conv2d_406.dc.transpose.0"(#loc207))
+#loc654 = loc("conv2d_406.dc.transpose.1"(#loc208))
+#loc655 = loc("conv2d_406.dc.conv2d.2"(#loc209))
+#loc656 = loc("conv2d_406.dc.transpose.3"(#loc210))
+#loc657 = loc("conv2d_406.dc.transpose.4"(#loc211))
+#loc658 = loc("multiply_414"(#loc212))
+#loc659 = loc("add_420"(#loc213))
+#loc660 = loc("relu_421"(#loc214))
+#loc661 = loc("conv2d_422.dc.transpose.0"(#loc215))
+#loc662 = loc("conv2d_422.dc.transpose.1"(#loc216))
+#loc663 = loc("conv2d_422.dc.conv2d.2"(#loc217))
+#loc664 = loc("conv2d_422.dc.transpose.3"(#loc218))
+#loc665 = loc("conv2d_422.dc.transpose.4"(#loc219))
+#loc666 = loc("multiply_430"(#loc220))
+#loc667 = loc("add_436"(#loc221))
+#loc668 = loc("conv2d_437.dc.transpose.0"(#loc222))
+#loc669 = loc("conv2d_437.dc.transpose.1"(#loc223))
+#loc670 = loc("conv2d_437.dc.conv2d.2"(#loc224))
+#loc671 = loc("conv2d_437.dc.transpose.3"(#loc225))
+#loc672 = loc("conv2d_437.dc.transpose.4"(#loc226))
+#loc673 = loc("multiply_445"(#loc227))
+#loc674 = loc("add_451"(#loc228))
+#loc675 = loc("add_452"(#loc229))
+#loc676 = loc("relu_453"(#loc230))
+#loc677 = loc("conv2d_454.dc.transpose.0"(#loc231))
+#loc678 = loc("conv2d_454.dc.transpose.1"(#loc232))
+#loc679 = loc("conv2d_454.dc.conv2d.2"(#loc233))
+#loc680 = loc("conv2d_454.dc.transpose.3"(#loc234))
+#loc681 = loc("conv2d_454.dc.transpose.4"(#loc235))
+#loc682 = loc("multiply_462"(#loc236))
+#loc683 = loc("add_468"(#loc237))
+#loc684 = loc("relu_469"(#loc238))
+#loc685 = loc("conv2d_470.dc.transpose.0"(#loc239))
+#loc686 = loc("conv2d_470.dc.transpose.1"(#loc240))
+#loc687 = loc("conv2d_470.dc.conv2d.2"(#loc241))
+#loc688 = loc("conv2d_470.dc.transpose.3"(#loc242))
+#loc689 = loc("conv2d_470.dc.transpose.4"(#loc243))
+#loc690 = loc("multiply_478"(#loc244))
+#loc691 = loc("add_484"(#loc245))
+#loc692 = loc("relu_485"(#loc246))
+#loc693 = loc("conv2d_486.dc.transpose.0"(#loc247))
+#loc694 = loc("conv2d_486.dc.transpose.1"(#loc248))
+#loc695 = loc("conv2d_486.dc.conv2d.2"(#loc249))
+#loc696 = loc("conv2d_486.dc.transpose.3"(#loc250))
+#loc697 = loc("conv2d_486.dc.transpose.4"(#loc251))
+#loc698 = loc("multiply_494"(#loc252))
+#loc699 = loc("add_500"(#loc253))
+#loc700 = loc("add_501"(#loc254))
+#loc701 = loc("relu_502"(#loc255))
+#loc702 = loc("conv2d_503.dc.transpose.0"(#loc256))
+#loc703 = loc("conv2d_503.dc.transpose.1"(#loc257))
+#loc704 = loc("conv2d_503.dc.conv2d.2"(#loc258))
+#loc705 = loc("conv2d_503.dc.transpose.3"(#loc259))
+#loc706 = loc("conv2d_503.dc.transpose.4"(#loc260))
+#loc707 = loc("multiply_511"(#loc261))
+#loc708 = loc("add_517"(#loc262))
+#loc709 = loc("relu_518"(#loc263))
+#loc710 = loc("conv2d_519.dc.transpose.0"(#loc264))
+#loc711 = loc("conv2d_519.dc.transpose.1"(#loc265))
+#loc712 = loc("conv2d_519.dc.conv2d.2"(#loc266))
+#loc713 = loc("conv2d_519.dc.transpose.3"(#loc267))
+#loc714 = loc("conv2d_519.dc.transpose.4"(#loc268))
+#loc715 = loc("multiply_527"(#loc269))
+#loc716 = loc("add_533"(#loc270))
+#loc717 = loc("relu_534"(#loc271))
+#loc718 = loc("conv2d_535.dc.transpose.0"(#loc272))
+#loc719 = loc("conv2d_535.dc.transpose.1"(#loc273))
+#loc720 = loc("conv2d_535.dc.conv2d.2"(#loc274))
+#loc721 = loc("conv2d_535.dc.transpose.3"(#loc275))
+#loc722 = loc("conv2d_535.dc.transpose.4"(#loc276))
+#loc723 = loc("multiply_543"(#loc277))
+#loc724 = loc("add_549"(#loc278))
+#loc725 = loc("add_550"(#loc279))
+#loc726 = loc("relu_551"(#loc280))
+#loc727 = loc("conv2d_552.dc.transpose.0"(#loc281))
+#loc728 = loc("conv2d_552.dc.transpose.1"(#loc282))
+#loc729 = loc("conv2d_552.dc.conv2d.2"(#loc283))
+#loc730 = loc("conv2d_552.dc.transpose.3"(#loc284))
+#loc731 = loc("conv2d_552.dc.transpose.4"(#loc285))
+#loc732 = loc("multiply_560"(#loc286))
+#loc733 = loc("add_566"(#loc287))
+#loc734 = loc("relu_567"(#loc288))
+#loc735 = loc("conv2d_568.dc.transpose.0"(#loc289))
+#loc736 = loc("conv2d_568.dc.transpose.1"(#loc290))
+#loc737 = loc("conv2d_568.dc.conv2d.2"(#loc291))
+#loc738 = loc("conv2d_568.dc.transpose.3"(#loc292))
+#loc739 = loc("conv2d_568.dc.transpose.4"(#loc293))
+#loc740 = loc("multiply_576"(#loc294))
+#loc741 = loc("add_582"(#loc295))
+#loc742 = loc("relu_583"(#loc296))
+#loc743 = loc("conv2d_584.dc.transpose.0"(#loc297))
+#loc744 = loc("conv2d_584.dc.transpose.1"(#loc298))
+#loc745 = loc("conv2d_584.dc.conv2d.2"(#loc299))
+#loc746 = loc("conv2d_584.dc.transpose.3"(#loc300))
+#loc747 = loc("conv2d_584.dc.transpose.4"(#loc301))
+#loc748 = loc("multiply_592"(#loc302))
+#loc749 = loc("add_598"(#loc303))
+#loc750 = loc("add_599"(#loc304))
+#loc751 = loc("relu_600"(#loc305))
+#loc752 = loc("conv2d_601.dc.transpose.0"(#loc306))
+#loc753 = loc("conv2d_601.dc.transpose.1"(#loc307))
+#loc754 = loc("conv2d_601.dc.conv2d.2"(#loc308))
+#loc755 = loc("conv2d_601.dc.transpose.3"(#loc309))
+#loc756 = loc("conv2d_601.dc.transpose.4"(#loc310))
+#loc757 = loc("multiply_609"(#loc311))
+#loc758 = loc("add_615"(#loc312))
+#loc759 = loc("relu_616"(#loc313))
+#loc760 = loc("conv2d_617.dc.transpose.0"(#loc314))
+#loc761 = loc("conv2d_617.dc.transpose.1"(#loc315))
+#loc762 = loc("conv2d_617.dc.conv2d.2"(#loc316))
+#loc763 = loc("conv2d_617.dc.transpose.3"(#loc317))
+#loc764 = loc("conv2d_617.dc.transpose.4"(#loc318))
+#loc765 = loc("multiply_625"(#loc319))
+#loc766 = loc("add_631"(#loc320))
+#loc767 = loc("relu_632"(#loc321))
+#loc768 = loc("conv2d_633.dc.transpose.0"(#loc322))
+#loc769 = loc("conv2d_633.dc.transpose.1"(#loc323))
+#loc770 = loc("conv2d_633.dc.conv2d.2"(#loc324))
+#loc771 = loc("conv2d_633.dc.transpose.3"(#loc325))
+#loc772 = loc("conv2d_633.dc.transpose.4"(#loc326))
+#loc773 = loc("multiply_641"(#loc327))
+#loc774 = loc("add_647"(#loc328))
+#loc775 = loc("add_648"(#loc329))
+#loc776 = loc("relu_649"(#loc330))
+#loc777 = loc("conv2d_650.dc.transpose.0"(#loc331))
+#loc778 = loc("conv2d_650.dc.transpose.1"(#loc332))
+#loc779 = loc("conv2d_650.dc.conv2d.2"(#loc333))
+#loc780 = loc("conv2d_650.dc.transpose.3"(#loc334))
+#loc781 = loc("conv2d_650.dc.transpose.4"(#loc335))
+#loc782 = loc("multiply_658"(#loc336))
+#loc783 = loc("add_664"(#loc337))
+#loc784 = loc("relu_665"(#loc338))
+#loc785 = loc("conv2d_666.dc.transpose.0"(#loc339))
+#loc786 = loc("conv2d_666.dc.transpose.1"(#loc340))
+#loc787 = loc("conv2d_666.dc.conv2d.2"(#loc341))
+#loc788 = loc("conv2d_666.dc.transpose.3"(#loc342))
+#loc789 = loc("conv2d_666.dc.transpose.4"(#loc343))
+#loc790 = loc("multiply_674"(#loc344))
+#loc791 = loc("add_680"(#loc345))
+#loc792 = loc("relu_681"(#loc346))
+#loc793 = loc("conv2d_682.dc.transpose.0"(#loc347))
+#loc794 = loc("conv2d_682.dc.transpose.1"(#loc348))
+#loc795 = loc("conv2d_682.dc.conv2d.2"(#loc349))
+#loc796 = loc("conv2d_682.dc.transpose.3"(#loc350))
+#loc797 = loc("conv2d_682.dc.transpose.4"(#loc351))
+#loc798 = loc("multiply_690"(#loc352))
+#loc799 = loc("add_696"(#loc353))
+#loc800 = loc("add_697"(#loc354))
+#loc801 = loc("relu_698"(#loc355))
+#loc802 = loc("conv2d_699.dc.transpose.0"(#loc356))
+#loc803 = loc("conv2d_699.dc.transpose.1"(#loc357))
+#loc804 = loc("conv2d_699.dc.conv2d.2"(#loc358))
+#loc805 = loc("conv2d_699.dc.transpose.3"(#loc359))
+#loc806 = loc("conv2d_699.dc.transpose.4"(#loc360))
+#loc807 = loc("multiply_707"(#loc361))
+#loc808 = loc("add_713"(#loc362))
+#loc809 = loc("relu_714"(#loc363))
+#loc810 = loc("conv2d_715.dc.transpose.0"(#loc364))
+#loc811 = loc("conv2d_715.dc.transpose.1"(#loc365))
+#loc812 = loc("conv2d_715.dc.conv2d.2"(#loc366))
+#loc813 = loc("conv2d_715.dc.transpose.3"(#loc367))
+#loc814 = loc("conv2d_715.dc.transpose.4"(#loc368))
+#loc815 = loc("multiply_723"(#loc369))
+#loc816 = loc("add_729"(#loc370))
+#loc817 = loc("relu_730"(#loc371))
+#loc818 = loc("conv2d_731.dc.transpose.0"(#loc372))
+#loc819 = loc("conv2d_731.dc.transpose.1"(#loc373))
+#loc820 = loc("conv2d_731.dc.conv2d.2"(#loc374))
+#loc821 = loc("conv2d_731.dc.transpose.3"(#loc375))
+#loc822 = loc("conv2d_731.dc.transpose.4"(#loc376))
+#loc823 = loc("multiply_739"(#loc377))
+#loc824 = loc("add_745"(#loc378))
+#loc825 = loc("conv2d_746.dc.transpose.0"(#loc379))
+#loc826 = loc("conv2d_746.dc.transpose.1"(#loc380))
+#loc827 = loc("conv2d_746.dc.conv2d.2"(#loc381))
+#loc828 = loc("conv2d_746.dc.transpose.3"(#loc382))
+#loc829 = loc("conv2d_746.dc.transpose.4"(#loc383))
+#loc830 = loc("multiply_754"(#loc384))
+#loc831 = loc("add_760"(#loc385))
+#loc832 = loc("add_761"(#loc386))
+#loc833 = loc("relu_762"(#loc387))
+#loc834 = loc("conv2d_763.dc.transpose.0"(#loc388))
+#loc835 = loc("conv2d_763.dc.transpose.1"(#loc389))
+#loc836 = loc("conv2d_763.dc.conv2d.2"(#loc390))
+#loc837 = loc("conv2d_763.dc.transpose.3"(#loc391))
+#loc838 = loc("conv2d_763.dc.transpose.4"(#loc392))
+#loc839 = loc("multiply_771"(#loc393))
+#loc840 = loc("add_777"(#loc394))
+#loc841 = loc("relu_778"(#loc395))
+#loc842 = loc("conv2d_779.dc.transpose.0"(#loc396))
+#loc843 = loc("conv2d_779.dc.transpose.1"(#loc397))
+#loc844 = loc("conv2d_779.dc.conv2d.2"(#loc398))
+#loc845 = loc("conv2d_779.dc.transpose.3"(#loc399))
+#loc846 = loc("conv2d_779.dc.transpose.4"(#loc400))
+#loc847 = loc("multiply_787"(#loc401))
+#loc848 = loc("add_793"(#loc402))
+#loc849 = loc("relu_794"(#loc403))
+#loc850 = loc("conv2d_795.dc.transpose.0"(#loc404))
+#loc851 = loc("conv2d_795.dc.transpose.1"(#loc405))
+#loc852 = loc("conv2d_795.dc.conv2d.2"(#loc406))
+#loc853 = loc("conv2d_795.dc.transpose.3"(#loc407))
+#loc854 = loc("conv2d_795.dc.transpose.4"(#loc408))
+#loc855 = loc("multiply_803"(#loc409))
+#loc856 = loc("add_809"(#loc410))
+#loc857 = loc("add_810"(#loc411))
+#loc858 = loc("relu_811"(#loc412))
+#loc859 = loc("conv2d_812.dc.transpose.0"(#loc413))
+#loc860 = loc("conv2d_812.dc.transpose.1"(#loc414))
+#loc861 = loc("conv2d_812.dc.conv2d.2"(#loc415))
+#loc862 = loc("conv2d_812.dc.transpose.3"(#loc416))
+#loc863 = loc("conv2d_812.dc.transpose.4"(#loc417))
+#loc864 = loc("multiply_820"(#loc418))
+#loc865 = loc("add_826"(#loc419))
+#loc866 = loc("relu_827"(#loc420))
+#loc867 = loc("conv2d_828.dc.transpose.0"(#loc421))
+#loc868 = loc("conv2d_828.dc.transpose.1"(#loc422))
+#loc869 = loc("conv2d_828.dc.conv2d.2"(#loc423))
+#loc870 = loc("conv2d_828.dc.transpose.3"(#loc424))
+#loc871 = loc("conv2d_828.dc.transpose.4"(#loc425))
+#loc872 = loc("multiply_836"(#loc426))
+#loc873 = loc("add_842"(#loc427))
+#loc874 = loc("relu_843"(#loc428))
+#loc875 = loc("conv2d_844.dc.transpose.0"(#loc429))
+#loc876 = loc("conv2d_844.dc.transpose.1"(#loc430))
+#loc877 = loc("conv2d_844.dc.conv2d.2"(#loc431))
+#loc878 = loc("conv2d_844.dc.transpose.3"(#loc432))
+#loc879 = loc("conv2d_844.dc.transpose.4"(#loc433))
+#loc880 = loc("multiply_852"(#loc434))
+#loc881 = loc("add_858"(#loc435))
+#loc882 = loc("add_859"(#loc436))
+#loc883 = loc("relu_860"(#loc437))
+#loc884 = loc("avg_pool2d_861.dc.reshape.0"(#loc438))
+#loc885 = loc("avg_pool2d_861.dc.transpose.1.dc.transpose.0"(#loc439))
+#loc886 = loc("avg_pool2d_861.dc.reduce_avg.2"(#loc440))
+#loc887 = loc("avg_pool2d_861.dc.reshape.4"(#loc441))
+#loc888 = loc("squeeze_863"(#loc442))
+#loc889 = loc("squeeze_864"(#loc443))
+#loc890 = loc("matmul_866"(#loc444))
+#loc891 = loc("add_867"(#loc445))
diff --git a/tools/explorer/test/models/test_10k_ops.mlir b/tools/explorer/test/models/test_10k_ops.mlir
new file mode 100644
index 000000000..0c94560df
--- /dev/null
+++ b/tools/explorer/test/models/test_10k_ops.mlir
@@ -0,0 +1,10005 @@
+module @Test10k attributes {} {
+  func.func @forward(%arg0: tensor<1x64xf32> {ttir.name = "input_1"}) -> (tensor<1x64xf32> {ttir.name = "TEST10k"}) {
+    %0 = tensor.empty() : tensor<1x64xf32>
+    %1 = "ttir.relu"(%arg0, %0) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2 = tensor.empty() : tensor<1x64xf32>
+    %3 = "ttir.relu"(%1, %2) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4 = tensor.empty() : tensor<1x64xf32>
+    %5 = "ttir.relu"(%3, %4) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6 = tensor.empty() : tensor<1x64xf32>
+    %7 = "ttir.relu"(%5, %6) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8 = tensor.empty() : tensor<1x64xf32>
+    %9 = "ttir.relu"(%7, %8) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %10 = tensor.empty() : tensor<1x64xf32>
+    %11 = "ttir.relu"(%9, %10) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %12 = tensor.empty() : tensor<1x64xf32>
+    %13 = "ttir.relu"(%11, %12) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %14 = tensor.empty() : tensor<1x64xf32>
+    %15 = "ttir.relu"(%13, %14) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %16 = tensor.empty() : tensor<1x64xf32>
+    %17 = "ttir.relu"(%15, %16) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %18 = tensor.empty() : tensor<1x64xf32>
+    %19 = "ttir.relu"(%17, %18) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %20 = tensor.empty() : tensor<1x64xf32>
+    %21 = "ttir.relu"(%19, %20) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %22 = tensor.empty() : tensor<1x64xf32>
+    %23 = "ttir.relu"(%21, %22) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %24 = tensor.empty() : tensor<1x64xf32>
+    %25 = "ttir.relu"(%23, %24) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %26 = tensor.empty() : tensor<1x64xf32>
+    %27 = "ttir.relu"(%25, %26) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %28 = tensor.empty() : tensor<1x64xf32>
+    %29 = "ttir.relu"(%27, %28) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %30 = tensor.empty() : tensor<1x64xf32>
+    %31 = "ttir.relu"(%29, %30) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %32 = tensor.empty() : tensor<1x64xf32>
+    %33 = "ttir.relu"(%31, %32) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %34 = tensor.empty() : tensor<1x64xf32>
+    %35 = "ttir.relu"(%33, %34) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %36 = tensor.empty() : tensor<1x64xf32>
+    %37 = "ttir.relu"(%35, %36) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %38 = tensor.empty() : tensor<1x64xf32>
+    %39 = "ttir.relu"(%37, %38) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %40 = tensor.empty() : tensor<1x64xf32>
+    %41 = "ttir.relu"(%39, %40) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %42 = tensor.empty() : tensor<1x64xf32>
+    %43 = "ttir.relu"(%41, %42) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %44 = tensor.empty() : tensor<1x64xf32>
+    %45 = "ttir.relu"(%43, %44) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %46 = tensor.empty() : tensor<1x64xf32>
+    %47 = "ttir.relu"(%45, %46) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %48 = tensor.empty() : tensor<1x64xf32>
+    %49 = "ttir.relu"(%47, %48) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %50 = tensor.empty() : tensor<1x64xf32>
+    %51 = "ttir.relu"(%49, %50) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %52 = tensor.empty() : tensor<1x64xf32>
+    %53 = "ttir.relu"(%51, %52) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %54 = tensor.empty() : tensor<1x64xf32>
+    %55 = "ttir.relu"(%53, %54) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %56 = tensor.empty() : tensor<1x64xf32>
+    %57 = "ttir.relu"(%55, %56) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %58 = tensor.empty() : tensor<1x64xf32>
+    %59 = "ttir.relu"(%57, %58) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %60 = tensor.empty() : tensor<1x64xf32>
+    %61 = "ttir.relu"(%59, %60) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %62 = tensor.empty() : tensor<1x64xf32>
+    %63 = "ttir.relu"(%61, %62) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %64 = tensor.empty() : tensor<1x64xf32>
+    %65 = "ttir.relu"(%63, %64) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %66 = tensor.empty() : tensor<1x64xf32>
+    %67 = "ttir.relu"(%65, %66) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %68 = tensor.empty() : tensor<1x64xf32>
+    %69 = "ttir.relu"(%67, %68) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %70 = tensor.empty() : tensor<1x64xf32>
+    %71 = "ttir.relu"(%69, %70) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %72 = tensor.empty() : tensor<1x64xf32>
+    %73 = "ttir.relu"(%71, %72) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %74 = tensor.empty() : tensor<1x64xf32>
+    %75 = "ttir.relu"(%73, %74) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %76 = tensor.empty() : tensor<1x64xf32>
+    %77 = "ttir.relu"(%75, %76) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %78 = tensor.empty() : tensor<1x64xf32>
+    %79 = "ttir.relu"(%77, %78) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %80 = tensor.empty() : tensor<1x64xf32>
+    %81 = "ttir.relu"(%79, %80) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %82 = tensor.empty() : tensor<1x64xf32>
+    %83 = "ttir.relu"(%81, %82) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %84 = tensor.empty() : tensor<1x64xf32>
+    %85 = "ttir.relu"(%83, %84) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %86 = tensor.empty() : tensor<1x64xf32>
+    %87 = "ttir.relu"(%85, %86) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %88 = tensor.empty() : tensor<1x64xf32>
+    %89 = "ttir.relu"(%87, %88) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %90 = tensor.empty() : tensor<1x64xf32>
+    %91 = "ttir.relu"(%89, %90) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %92 = tensor.empty() : tensor<1x64xf32>
+    %93 = "ttir.relu"(%91, %92) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %94 = tensor.empty() : tensor<1x64xf32>
+    %95 = "ttir.relu"(%93, %94) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %96 = tensor.empty() : tensor<1x64xf32>
+    %97 = "ttir.relu"(%95, %96) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %98 = tensor.empty() : tensor<1x64xf32>
+    %99 = "ttir.relu"(%97, %98) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %100 = tensor.empty() : tensor<1x64xf32>
+    %101 = "ttir.relu"(%99, %100) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %102 = tensor.empty() : tensor<1x64xf32>
+    %103 = "ttir.relu"(%101, %102) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %104 = tensor.empty() : tensor<1x64xf32>
+    %105 = "ttir.relu"(%103, %104) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %106 = tensor.empty() : tensor<1x64xf32>
+    %107 = "ttir.relu"(%105, %106) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %108 = tensor.empty() : tensor<1x64xf32>
+    %109 = "ttir.relu"(%107, %108) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %110 = tensor.empty() : tensor<1x64xf32>
+    %111 = "ttir.relu"(%109, %110) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %112 = tensor.empty() : tensor<1x64xf32>
+    %113 = "ttir.relu"(%111, %112) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %114 = tensor.empty() : tensor<1x64xf32>
+    %115 = "ttir.relu"(%113, %114) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %116 = tensor.empty() : tensor<1x64xf32>
+    %117 = "ttir.relu"(%115, %116) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %118 = tensor.empty() : tensor<1x64xf32>
+    %119 = "ttir.relu"(%117, %118) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %120 = tensor.empty() : tensor<1x64xf32>
+    %121 = "ttir.relu"(%119, %120) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %122 = tensor.empty() : tensor<1x64xf32>
+    %123 = "ttir.relu"(%121, %122) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %124 = tensor.empty() : tensor<1x64xf32>
+    %125 = "ttir.relu"(%123, %124) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %126 = tensor.empty() : tensor<1x64xf32>
+    %127 = "ttir.relu"(%125, %126) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %128 = tensor.empty() : tensor<1x64xf32>
+    %129 = "ttir.relu"(%127, %128) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %130 = tensor.empty() : tensor<1x64xf32>
+    %131 = "ttir.relu"(%129, %130) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %132 = tensor.empty() : tensor<1x64xf32>
+    %133 = "ttir.relu"(%131, %132) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %134 = tensor.empty() : tensor<1x64xf32>
+    %135 = "ttir.relu"(%133, %134) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %136 = tensor.empty() : tensor<1x64xf32>
+    %137 = "ttir.relu"(%135, %136) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %138 = tensor.empty() : tensor<1x64xf32>
+    %139 = "ttir.relu"(%137, %138) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %140 = tensor.empty() : tensor<1x64xf32>
+    %141 = "ttir.relu"(%139, %140) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %142 = tensor.empty() : tensor<1x64xf32>
+    %143 = "ttir.relu"(%141, %142) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %144 = tensor.empty() : tensor<1x64xf32>
+    %145 = "ttir.relu"(%143, %144) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %146 = tensor.empty() : tensor<1x64xf32>
+    %147 = "ttir.relu"(%145, %146) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %148 = tensor.empty() : tensor<1x64xf32>
+    %149 = "ttir.relu"(%147, %148) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %150 = tensor.empty() : tensor<1x64xf32>
+    %151 = "ttir.relu"(%149, %150) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %152 = tensor.empty() : tensor<1x64xf32>
+    %153 = "ttir.relu"(%151, %152) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %154 = tensor.empty() : tensor<1x64xf32>
+    %155 = "ttir.relu"(%153, %154) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %156 = tensor.empty() : tensor<1x64xf32>
+    %157 = "ttir.relu"(%155, %156) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %158 = tensor.empty() : tensor<1x64xf32>
+    %159 = "ttir.relu"(%157, %158) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %160 = tensor.empty() : tensor<1x64xf32>
+    %161 = "ttir.relu"(%159, %160) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %162 = tensor.empty() : tensor<1x64xf32>
+    %163 = "ttir.relu"(%161, %162) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %164 = tensor.empty() : tensor<1x64xf32>
+    %165 = "ttir.relu"(%163, %164) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %166 = tensor.empty() : tensor<1x64xf32>
+    %167 = "ttir.relu"(%165, %166) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %168 = tensor.empty() : tensor<1x64xf32>
+    %169 = "ttir.relu"(%167, %168) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %170 = tensor.empty() : tensor<1x64xf32>
+    %171 = "ttir.relu"(%169, %170) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %172 = tensor.empty() : tensor<1x64xf32>
+    %173 = "ttir.relu"(%171, %172) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %174 = tensor.empty() : tensor<1x64xf32>
+    %175 = "ttir.relu"(%173, %174) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %176 = tensor.empty() : tensor<1x64xf32>
+    %177 = "ttir.relu"(%175, %176) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %178 = tensor.empty() : tensor<1x64xf32>
+    %179 = "ttir.relu"(%177, %178) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %180 = tensor.empty() : tensor<1x64xf32>
+    %181 = "ttir.relu"(%179, %180) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %182 = tensor.empty() : tensor<1x64xf32>
+    %183 = "ttir.relu"(%181, %182) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %184 = tensor.empty() : tensor<1x64xf32>
+    %185 = "ttir.relu"(%183, %184) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %186 = tensor.empty() : tensor<1x64xf32>
+    %187 = "ttir.relu"(%185, %186) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %188 = tensor.empty() : tensor<1x64xf32>
+    %189 = "ttir.relu"(%187, %188) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %190 = tensor.empty() : tensor<1x64xf32>
+    %191 = "ttir.relu"(%189, %190) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %192 = tensor.empty() : tensor<1x64xf32>
+    %193 = "ttir.relu"(%191, %192) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %194 = tensor.empty() : tensor<1x64xf32>
+    %195 = "ttir.relu"(%193, %194) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %196 = tensor.empty() : tensor<1x64xf32>
+    %197 = "ttir.relu"(%195, %196) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %198 = tensor.empty() : tensor<1x64xf32>
+    %199 = "ttir.relu"(%197, %198) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %200 = tensor.empty() : tensor<1x64xf32>
+    %201 = "ttir.relu"(%199, %200) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %202 = tensor.empty() : tensor<1x64xf32>
+    %203 = "ttir.relu"(%201, %202) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %204 = tensor.empty() : tensor<1x64xf32>
+    %205 = "ttir.relu"(%203, %204) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %206 = tensor.empty() : tensor<1x64xf32>
+    %207 = "ttir.relu"(%205, %206) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %208 = tensor.empty() : tensor<1x64xf32>
+    %209 = "ttir.relu"(%207, %208) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %210 = tensor.empty() : tensor<1x64xf32>
+    %211 = "ttir.relu"(%209, %210) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %212 = tensor.empty() : tensor<1x64xf32>
+    %213 = "ttir.relu"(%211, %212) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %214 = tensor.empty() : tensor<1x64xf32>
+    %215 = "ttir.relu"(%213, %214) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %216 = tensor.empty() : tensor<1x64xf32>
+    %217 = "ttir.relu"(%215, %216) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %218 = tensor.empty() : tensor<1x64xf32>
+    %219 = "ttir.relu"(%217, %218) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %220 = tensor.empty() : tensor<1x64xf32>
+    %221 = "ttir.relu"(%219, %220) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %222 = tensor.empty() : tensor<1x64xf32>
+    %223 = "ttir.relu"(%221, %222) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %224 = tensor.empty() : tensor<1x64xf32>
+    %225 = "ttir.relu"(%223, %224) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %226 = tensor.empty() : tensor<1x64xf32>
+    %227 = "ttir.relu"(%225, %226) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %228 = tensor.empty() : tensor<1x64xf32>
+    %229 = "ttir.relu"(%227, %228) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %230 = tensor.empty() : tensor<1x64xf32>
+    %231 = "ttir.relu"(%229, %230) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %232 = tensor.empty() : tensor<1x64xf32>
+    %233 = "ttir.relu"(%231, %232) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %234 = tensor.empty() : tensor<1x64xf32>
+    %235 = "ttir.relu"(%233, %234) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %236 = tensor.empty() : tensor<1x64xf32>
+    %237 = "ttir.relu"(%235, %236) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %238 = tensor.empty() : tensor<1x64xf32>
+    %239 = "ttir.relu"(%237, %238) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %240 = tensor.empty() : tensor<1x64xf32>
+    %241 = "ttir.relu"(%239, %240) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %242 = tensor.empty() : tensor<1x64xf32>
+    %243 = "ttir.relu"(%241, %242) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %244 = tensor.empty() : tensor<1x64xf32>
+    %245 = "ttir.relu"(%243, %244) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %246 = tensor.empty() : tensor<1x64xf32>
+    %247 = "ttir.relu"(%245, %246) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %248 = tensor.empty() : tensor<1x64xf32>
+    %249 = "ttir.relu"(%247, %248) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %250 = tensor.empty() : tensor<1x64xf32>
+    %251 = "ttir.relu"(%249, %250) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %252 = tensor.empty() : tensor<1x64xf32>
+    %253 = "ttir.relu"(%251, %252) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %254 = tensor.empty() : tensor<1x64xf32>
+    %255 = "ttir.relu"(%253, %254) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %256 = tensor.empty() : tensor<1x64xf32>
+    %257 = "ttir.relu"(%255, %256) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %258 = tensor.empty() : tensor<1x64xf32>
+    %259 = "ttir.relu"(%257, %258) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %260 = tensor.empty() : tensor<1x64xf32>
+    %261 = "ttir.relu"(%259, %260) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %262 = tensor.empty() : tensor<1x64xf32>
+    %263 = "ttir.relu"(%261, %262) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %264 = tensor.empty() : tensor<1x64xf32>
+    %265 = "ttir.relu"(%263, %264) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %266 = tensor.empty() : tensor<1x64xf32>
+    %267 = "ttir.relu"(%265, %266) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %268 = tensor.empty() : tensor<1x64xf32>
+    %269 = "ttir.relu"(%267, %268) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %270 = tensor.empty() : tensor<1x64xf32>
+    %271 = "ttir.relu"(%269, %270) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %272 = tensor.empty() : tensor<1x64xf32>
+    %273 = "ttir.relu"(%271, %272) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %274 = tensor.empty() : tensor<1x64xf32>
+    %275 = "ttir.relu"(%273, %274) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %276 = tensor.empty() : tensor<1x64xf32>
+    %277 = "ttir.relu"(%275, %276) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %278 = tensor.empty() : tensor<1x64xf32>
+    %279 = "ttir.relu"(%277, %278) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %280 = tensor.empty() : tensor<1x64xf32>
+    %281 = "ttir.relu"(%279, %280) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %282 = tensor.empty() : tensor<1x64xf32>
+    %283 = "ttir.relu"(%281, %282) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %284 = tensor.empty() : tensor<1x64xf32>
+    %285 = "ttir.relu"(%283, %284) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %286 = tensor.empty() : tensor<1x64xf32>
+    %287 = "ttir.relu"(%285, %286) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %288 = tensor.empty() : tensor<1x64xf32>
+    %289 = "ttir.relu"(%287, %288) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %290 = tensor.empty() : tensor<1x64xf32>
+    %291 = "ttir.relu"(%289, %290) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %292 = tensor.empty() : tensor<1x64xf32>
+    %293 = "ttir.relu"(%291, %292) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %294 = tensor.empty() : tensor<1x64xf32>
+    %295 = "ttir.relu"(%293, %294) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %296 = tensor.empty() : tensor<1x64xf32>
+    %297 = "ttir.relu"(%295, %296) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %298 = tensor.empty() : tensor<1x64xf32>
+    %299 = "ttir.relu"(%297, %298) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %300 = tensor.empty() : tensor<1x64xf32>
+    %301 = "ttir.relu"(%299, %300) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %302 = tensor.empty() : tensor<1x64xf32>
+    %303 = "ttir.relu"(%301, %302) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %304 = tensor.empty() : tensor<1x64xf32>
+    %305 = "ttir.relu"(%303, %304) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %306 = tensor.empty() : tensor<1x64xf32>
+    %307 = "ttir.relu"(%305, %306) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %308 = tensor.empty() : tensor<1x64xf32>
+    %309 = "ttir.relu"(%307, %308) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %310 = tensor.empty() : tensor<1x64xf32>
+    %311 = "ttir.relu"(%309, %310) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %312 = tensor.empty() : tensor<1x64xf32>
+    %313 = "ttir.relu"(%311, %312) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %314 = tensor.empty() : tensor<1x64xf32>
+    %315 = "ttir.relu"(%313, %314) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %316 = tensor.empty() : tensor<1x64xf32>
+    %317 = "ttir.relu"(%315, %316) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %318 = tensor.empty() : tensor<1x64xf32>
+    %319 = "ttir.relu"(%317, %318) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %320 = tensor.empty() : tensor<1x64xf32>
+    %321 = "ttir.relu"(%319, %320) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %322 = tensor.empty() : tensor<1x64xf32>
+    %323 = "ttir.relu"(%321, %322) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %324 = tensor.empty() : tensor<1x64xf32>
+    %325 = "ttir.relu"(%323, %324) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %326 = tensor.empty() : tensor<1x64xf32>
+    %327 = "ttir.relu"(%325, %326) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %328 = tensor.empty() : tensor<1x64xf32>
+    %329 = "ttir.relu"(%327, %328) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %330 = tensor.empty() : tensor<1x64xf32>
+    %331 = "ttir.relu"(%329, %330) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %332 = tensor.empty() : tensor<1x64xf32>
+    %333 = "ttir.relu"(%331, %332) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %334 = tensor.empty() : tensor<1x64xf32>
+    %335 = "ttir.relu"(%333, %334) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %336 = tensor.empty() : tensor<1x64xf32>
+    %337 = "ttir.relu"(%335, %336) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %338 = tensor.empty() : tensor<1x64xf32>
+    %339 = "ttir.relu"(%337, %338) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %340 = tensor.empty() : tensor<1x64xf32>
+    %341 = "ttir.relu"(%339, %340) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %342 = tensor.empty() : tensor<1x64xf32>
+    %343 = "ttir.relu"(%341, %342) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %344 = tensor.empty() : tensor<1x64xf32>
+    %345 = "ttir.relu"(%343, %344) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %346 = tensor.empty() : tensor<1x64xf32>
+    %347 = "ttir.relu"(%345, %346) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %348 = tensor.empty() : tensor<1x64xf32>
+    %349 = "ttir.relu"(%347, %348) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %350 = tensor.empty() : tensor<1x64xf32>
+    %351 = "ttir.relu"(%349, %350) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %352 = tensor.empty() : tensor<1x64xf32>
+    %353 = "ttir.relu"(%351, %352) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %354 = tensor.empty() : tensor<1x64xf32>
+    %355 = "ttir.relu"(%353, %354) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %356 = tensor.empty() : tensor<1x64xf32>
+    %357 = "ttir.relu"(%355, %356) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %358 = tensor.empty() : tensor<1x64xf32>
+    %359 = "ttir.relu"(%357, %358) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %360 = tensor.empty() : tensor<1x64xf32>
+    %361 = "ttir.relu"(%359, %360) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %362 = tensor.empty() : tensor<1x64xf32>
+    %363 = "ttir.relu"(%361, %362) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %364 = tensor.empty() : tensor<1x64xf32>
+    %365 = "ttir.relu"(%363, %364) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %366 = tensor.empty() : tensor<1x64xf32>
+    %367 = "ttir.relu"(%365, %366) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %368 = tensor.empty() : tensor<1x64xf32>
+    %369 = "ttir.relu"(%367, %368) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %370 = tensor.empty() : tensor<1x64xf32>
+    %371 = "ttir.relu"(%369, %370) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %372 = tensor.empty() : tensor<1x64xf32>
+    %373 = "ttir.relu"(%371, %372) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %374 = tensor.empty() : tensor<1x64xf32>
+    %375 = "ttir.relu"(%373, %374) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %376 = tensor.empty() : tensor<1x64xf32>
+    %377 = "ttir.relu"(%375, %376) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %378 = tensor.empty() : tensor<1x64xf32>
+    %379 = "ttir.relu"(%377, %378) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %380 = tensor.empty() : tensor<1x64xf32>
+    %381 = "ttir.relu"(%379, %380) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %382 = tensor.empty() : tensor<1x64xf32>
+    %383 = "ttir.relu"(%381, %382) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %384 = tensor.empty() : tensor<1x64xf32>
+    %385 = "ttir.relu"(%383, %384) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %386 = tensor.empty() : tensor<1x64xf32>
+    %387 = "ttir.relu"(%385, %386) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %388 = tensor.empty() : tensor<1x64xf32>
+    %389 = "ttir.relu"(%387, %388) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %390 = tensor.empty() : tensor<1x64xf32>
+    %391 = "ttir.relu"(%389, %390) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %392 = tensor.empty() : tensor<1x64xf32>
+    %393 = "ttir.relu"(%391, %392) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %394 = tensor.empty() : tensor<1x64xf32>
+    %395 = "ttir.relu"(%393, %394) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %396 = tensor.empty() : tensor<1x64xf32>
+    %397 = "ttir.relu"(%395, %396) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %398 = tensor.empty() : tensor<1x64xf32>
+    %399 = "ttir.relu"(%397, %398) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %400 = tensor.empty() : tensor<1x64xf32>
+    %401 = "ttir.relu"(%399, %400) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %402 = tensor.empty() : tensor<1x64xf32>
+    %403 = "ttir.relu"(%401, %402) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %404 = tensor.empty() : tensor<1x64xf32>
+    %405 = "ttir.relu"(%403, %404) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %406 = tensor.empty() : tensor<1x64xf32>
+    %407 = "ttir.relu"(%405, %406) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %408 = tensor.empty() : tensor<1x64xf32>
+    %409 = "ttir.relu"(%407, %408) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %410 = tensor.empty() : tensor<1x64xf32>
+    %411 = "ttir.relu"(%409, %410) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %412 = tensor.empty() : tensor<1x64xf32>
+    %413 = "ttir.relu"(%411, %412) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %414 = tensor.empty() : tensor<1x64xf32>
+    %415 = "ttir.relu"(%413, %414) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %416 = tensor.empty() : tensor<1x64xf32>
+    %417 = "ttir.relu"(%415, %416) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %418 = tensor.empty() : tensor<1x64xf32>
+    %419 = "ttir.relu"(%417, %418) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %420 = tensor.empty() : tensor<1x64xf32>
+    %421 = "ttir.relu"(%419, %420) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %422 = tensor.empty() : tensor<1x64xf32>
+    %423 = "ttir.relu"(%421, %422) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %424 = tensor.empty() : tensor<1x64xf32>
+    %425 = "ttir.relu"(%423, %424) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %426 = tensor.empty() : tensor<1x64xf32>
+    %427 = "ttir.relu"(%425, %426) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %428 = tensor.empty() : tensor<1x64xf32>
+    %429 = "ttir.relu"(%427, %428) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %430 = tensor.empty() : tensor<1x64xf32>
+    %431 = "ttir.relu"(%429, %430) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %432 = tensor.empty() : tensor<1x64xf32>
+    %433 = "ttir.relu"(%431, %432) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %434 = tensor.empty() : tensor<1x64xf32>
+    %435 = "ttir.relu"(%433, %434) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %436 = tensor.empty() : tensor<1x64xf32>
+    %437 = "ttir.relu"(%435, %436) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %438 = tensor.empty() : tensor<1x64xf32>
+    %439 = "ttir.relu"(%437, %438) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %440 = tensor.empty() : tensor<1x64xf32>
+    %441 = "ttir.relu"(%439, %440) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %442 = tensor.empty() : tensor<1x64xf32>
+    %443 = "ttir.relu"(%441, %442) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %444 = tensor.empty() : tensor<1x64xf32>
+    %445 = "ttir.relu"(%443, %444) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %446 = tensor.empty() : tensor<1x64xf32>
+    %447 = "ttir.relu"(%445, %446) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %448 = tensor.empty() : tensor<1x64xf32>
+    %449 = "ttir.relu"(%447, %448) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %450 = tensor.empty() : tensor<1x64xf32>
+    %451 = "ttir.relu"(%449, %450) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %452 = tensor.empty() : tensor<1x64xf32>
+    %453 = "ttir.relu"(%451, %452) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %454 = tensor.empty() : tensor<1x64xf32>
+    %455 = "ttir.relu"(%453, %454) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %456 = tensor.empty() : tensor<1x64xf32>
+    %457 = "ttir.relu"(%455, %456) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %458 = tensor.empty() : tensor<1x64xf32>
+    %459 = "ttir.relu"(%457, %458) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %460 = tensor.empty() : tensor<1x64xf32>
+    %461 = "ttir.relu"(%459, %460) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %462 = tensor.empty() : tensor<1x64xf32>
+    %463 = "ttir.relu"(%461, %462) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %464 = tensor.empty() : tensor<1x64xf32>
+    %465 = "ttir.relu"(%463, %464) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %466 = tensor.empty() : tensor<1x64xf32>
+    %467 = "ttir.relu"(%465, %466) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %468 = tensor.empty() : tensor<1x64xf32>
+    %469 = "ttir.relu"(%467, %468) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %470 = tensor.empty() : tensor<1x64xf32>
+    %471 = "ttir.relu"(%469, %470) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %472 = tensor.empty() : tensor<1x64xf32>
+    %473 = "ttir.relu"(%471, %472) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %474 = tensor.empty() : tensor<1x64xf32>
+    %475 = "ttir.relu"(%473, %474) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %476 = tensor.empty() : tensor<1x64xf32>
+    %477 = "ttir.relu"(%475, %476) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %478 = tensor.empty() : tensor<1x64xf32>
+    %479 = "ttir.relu"(%477, %478) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %480 = tensor.empty() : tensor<1x64xf32>
+    %481 = "ttir.relu"(%479, %480) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %482 = tensor.empty() : tensor<1x64xf32>
+    %483 = "ttir.relu"(%481, %482) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %484 = tensor.empty() : tensor<1x64xf32>
+    %485 = "ttir.relu"(%483, %484) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %486 = tensor.empty() : tensor<1x64xf32>
+    %487 = "ttir.relu"(%485, %486) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %488 = tensor.empty() : tensor<1x64xf32>
+    %489 = "ttir.relu"(%487, %488) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %490 = tensor.empty() : tensor<1x64xf32>
+    %491 = "ttir.relu"(%489, %490) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %492 = tensor.empty() : tensor<1x64xf32>
+    %493 = "ttir.relu"(%491, %492) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %494 = tensor.empty() : tensor<1x64xf32>
+    %495 = "ttir.relu"(%493, %494) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %496 = tensor.empty() : tensor<1x64xf32>
+    %497 = "ttir.relu"(%495, %496) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %498 = tensor.empty() : tensor<1x64xf32>
+    %499 = "ttir.relu"(%497, %498) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %500 = tensor.empty() : tensor<1x64xf32>
+    %501 = "ttir.relu"(%499, %500) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %502 = tensor.empty() : tensor<1x64xf32>
+    %503 = "ttir.relu"(%501, %502) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %504 = tensor.empty() : tensor<1x64xf32>
+    %505 = "ttir.relu"(%503, %504) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %506 = tensor.empty() : tensor<1x64xf32>
+    %507 = "ttir.relu"(%505, %506) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %508 = tensor.empty() : tensor<1x64xf32>
+    %509 = "ttir.relu"(%507, %508) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %510 = tensor.empty() : tensor<1x64xf32>
+    %511 = "ttir.relu"(%509, %510) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %512 = tensor.empty() : tensor<1x64xf32>
+    %513 = "ttir.relu"(%511, %512) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %514 = tensor.empty() : tensor<1x64xf32>
+    %515 = "ttir.relu"(%513, %514) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %516 = tensor.empty() : tensor<1x64xf32>
+    %517 = "ttir.relu"(%515, %516) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %518 = tensor.empty() : tensor<1x64xf32>
+    %519 = "ttir.relu"(%517, %518) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %520 = tensor.empty() : tensor<1x64xf32>
+    %521 = "ttir.relu"(%519, %520) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %522 = tensor.empty() : tensor<1x64xf32>
+    %523 = "ttir.relu"(%521, %522) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %524 = tensor.empty() : tensor<1x64xf32>
+    %525 = "ttir.relu"(%523, %524) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %526 = tensor.empty() : tensor<1x64xf32>
+    %527 = "ttir.relu"(%525, %526) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %528 = tensor.empty() : tensor<1x64xf32>
+    %529 = "ttir.relu"(%527, %528) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %530 = tensor.empty() : tensor<1x64xf32>
+    %531 = "ttir.relu"(%529, %530) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %532 = tensor.empty() : tensor<1x64xf32>
+    %533 = "ttir.relu"(%531, %532) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %534 = tensor.empty() : tensor<1x64xf32>
+    %535 = "ttir.relu"(%533, %534) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %536 = tensor.empty() : tensor<1x64xf32>
+    %537 = "ttir.relu"(%535, %536) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %538 = tensor.empty() : tensor<1x64xf32>
+    %539 = "ttir.relu"(%537, %538) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %540 = tensor.empty() : tensor<1x64xf32>
+    %541 = "ttir.relu"(%539, %540) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %542 = tensor.empty() : tensor<1x64xf32>
+    %543 = "ttir.relu"(%541, %542) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %544 = tensor.empty() : tensor<1x64xf32>
+    %545 = "ttir.relu"(%543, %544) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %546 = tensor.empty() : tensor<1x64xf32>
+    %547 = "ttir.relu"(%545, %546) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %548 = tensor.empty() : tensor<1x64xf32>
+    %549 = "ttir.relu"(%547, %548) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %550 = tensor.empty() : tensor<1x64xf32>
+    %551 = "ttir.relu"(%549, %550) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %552 = tensor.empty() : tensor<1x64xf32>
+    %553 = "ttir.relu"(%551, %552) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %554 = tensor.empty() : tensor<1x64xf32>
+    %555 = "ttir.relu"(%553, %554) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %556 = tensor.empty() : tensor<1x64xf32>
+    %557 = "ttir.relu"(%555, %556) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %558 = tensor.empty() : tensor<1x64xf32>
+    %559 = "ttir.relu"(%557, %558) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %560 = tensor.empty() : tensor<1x64xf32>
+    %561 = "ttir.relu"(%559, %560) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %562 = tensor.empty() : tensor<1x64xf32>
+    %563 = "ttir.relu"(%561, %562) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %564 = tensor.empty() : tensor<1x64xf32>
+    %565 = "ttir.relu"(%563, %564) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %566 = tensor.empty() : tensor<1x64xf32>
+    %567 = "ttir.relu"(%565, %566) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %568 = tensor.empty() : tensor<1x64xf32>
+    %569 = "ttir.relu"(%567, %568) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %570 = tensor.empty() : tensor<1x64xf32>
+    %571 = "ttir.relu"(%569, %570) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %572 = tensor.empty() : tensor<1x64xf32>
+    %573 = "ttir.relu"(%571, %572) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %574 = tensor.empty() : tensor<1x64xf32>
+    %575 = "ttir.relu"(%573, %574) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %576 = tensor.empty() : tensor<1x64xf32>
+    %577 = "ttir.relu"(%575, %576) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %578 = tensor.empty() : tensor<1x64xf32>
+    %579 = "ttir.relu"(%577, %578) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %580 = tensor.empty() : tensor<1x64xf32>
+    %581 = "ttir.relu"(%579, %580) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %582 = tensor.empty() : tensor<1x64xf32>
+    %583 = "ttir.relu"(%581, %582) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %584 = tensor.empty() : tensor<1x64xf32>
+    %585 = "ttir.relu"(%583, %584) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %586 = tensor.empty() : tensor<1x64xf32>
+    %587 = "ttir.relu"(%585, %586) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %588 = tensor.empty() : tensor<1x64xf32>
+    %589 = "ttir.relu"(%587, %588) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %590 = tensor.empty() : tensor<1x64xf32>
+    %591 = "ttir.relu"(%589, %590) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %592 = tensor.empty() : tensor<1x64xf32>
+    %593 = "ttir.relu"(%591, %592) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %594 = tensor.empty() : tensor<1x64xf32>
+    %595 = "ttir.relu"(%593, %594) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %596 = tensor.empty() : tensor<1x64xf32>
+    %597 = "ttir.relu"(%595, %596) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %598 = tensor.empty() : tensor<1x64xf32>
+    %599 = "ttir.relu"(%597, %598) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %600 = tensor.empty() : tensor<1x64xf32>
+    %601 = "ttir.relu"(%599, %600) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %602 = tensor.empty() : tensor<1x64xf32>
+    %603 = "ttir.relu"(%601, %602) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %604 = tensor.empty() : tensor<1x64xf32>
+    %605 = "ttir.relu"(%603, %604) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %606 = tensor.empty() : tensor<1x64xf32>
+    %607 = "ttir.relu"(%605, %606) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %608 = tensor.empty() : tensor<1x64xf32>
+    %609 = "ttir.relu"(%607, %608) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %610 = tensor.empty() : tensor<1x64xf32>
+    %611 = "ttir.relu"(%609, %610) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %612 = tensor.empty() : tensor<1x64xf32>
+    %613 = "ttir.relu"(%611, %612) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %614 = tensor.empty() : tensor<1x64xf32>
+    %615 = "ttir.relu"(%613, %614) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %616 = tensor.empty() : tensor<1x64xf32>
+    %617 = "ttir.relu"(%615, %616) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %618 = tensor.empty() : tensor<1x64xf32>
+    %619 = "ttir.relu"(%617, %618) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %620 = tensor.empty() : tensor<1x64xf32>
+    %621 = "ttir.relu"(%619, %620) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %622 = tensor.empty() : tensor<1x64xf32>
+    %623 = "ttir.relu"(%621, %622) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %624 = tensor.empty() : tensor<1x64xf32>
+    %625 = "ttir.relu"(%623, %624) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %626 = tensor.empty() : tensor<1x64xf32>
+    %627 = "ttir.relu"(%625, %626) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %628 = tensor.empty() : tensor<1x64xf32>
+    %629 = "ttir.relu"(%627, %628) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %630 = tensor.empty() : tensor<1x64xf32>
+    %631 = "ttir.relu"(%629, %630) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %632 = tensor.empty() : tensor<1x64xf32>
+    %633 = "ttir.relu"(%631, %632) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %634 = tensor.empty() : tensor<1x64xf32>
+    %635 = "ttir.relu"(%633, %634) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %636 = tensor.empty() : tensor<1x64xf32>
+    %637 = "ttir.relu"(%635, %636) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %638 = tensor.empty() : tensor<1x64xf32>
+    %639 = "ttir.relu"(%637, %638) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %640 = tensor.empty() : tensor<1x64xf32>
+    %641 = "ttir.relu"(%639, %640) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %642 = tensor.empty() : tensor<1x64xf32>
+    %643 = "ttir.relu"(%641, %642) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %644 = tensor.empty() : tensor<1x64xf32>
+    %645 = "ttir.relu"(%643, %644) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %646 = tensor.empty() : tensor<1x64xf32>
+    %647 = "ttir.relu"(%645, %646) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %648 = tensor.empty() : tensor<1x64xf32>
+    %649 = "ttir.relu"(%647, %648) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %650 = tensor.empty() : tensor<1x64xf32>
+    %651 = "ttir.relu"(%649, %650) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %652 = tensor.empty() : tensor<1x64xf32>
+    %653 = "ttir.relu"(%651, %652) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %654 = tensor.empty() : tensor<1x64xf32>
+    %655 = "ttir.relu"(%653, %654) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %656 = tensor.empty() : tensor<1x64xf32>
+    %657 = "ttir.relu"(%655, %656) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %658 = tensor.empty() : tensor<1x64xf32>
+    %659 = "ttir.relu"(%657, %658) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %660 = tensor.empty() : tensor<1x64xf32>
+    %661 = "ttir.relu"(%659, %660) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %662 = tensor.empty() : tensor<1x64xf32>
+    %663 = "ttir.relu"(%661, %662) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %664 = tensor.empty() : tensor<1x64xf32>
+    %665 = "ttir.relu"(%663, %664) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %666 = tensor.empty() : tensor<1x64xf32>
+    %667 = "ttir.relu"(%665, %666) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %668 = tensor.empty() : tensor<1x64xf32>
+    %669 = "ttir.relu"(%667, %668) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %670 = tensor.empty() : tensor<1x64xf32>
+    %671 = "ttir.relu"(%669, %670) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %672 = tensor.empty() : tensor<1x64xf32>
+    %673 = "ttir.relu"(%671, %672) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %674 = tensor.empty() : tensor<1x64xf32>
+    %675 = "ttir.relu"(%673, %674) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %676 = tensor.empty() : tensor<1x64xf32>
+    %677 = "ttir.relu"(%675, %676) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %678 = tensor.empty() : tensor<1x64xf32>
+    %679 = "ttir.relu"(%677, %678) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %680 = tensor.empty() : tensor<1x64xf32>
+    %681 = "ttir.relu"(%679, %680) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %682 = tensor.empty() : tensor<1x64xf32>
+    %683 = "ttir.relu"(%681, %682) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %684 = tensor.empty() : tensor<1x64xf32>
+    %685 = "ttir.relu"(%683, %684) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %686 = tensor.empty() : tensor<1x64xf32>
+    %687 = "ttir.relu"(%685, %686) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %688 = tensor.empty() : tensor<1x64xf32>
+    %689 = "ttir.relu"(%687, %688) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %690 = tensor.empty() : tensor<1x64xf32>
+    %691 = "ttir.relu"(%689, %690) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %692 = tensor.empty() : tensor<1x64xf32>
+    %693 = "ttir.relu"(%691, %692) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %694 = tensor.empty() : tensor<1x64xf32>
+    %695 = "ttir.relu"(%693, %694) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %696 = tensor.empty() : tensor<1x64xf32>
+    %697 = "ttir.relu"(%695, %696) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %698 = tensor.empty() : tensor<1x64xf32>
+    %699 = "ttir.relu"(%697, %698) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %700 = tensor.empty() : tensor<1x64xf32>
+    %701 = "ttir.relu"(%699, %700) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %702 = tensor.empty() : tensor<1x64xf32>
+    %703 = "ttir.relu"(%701, %702) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %704 = tensor.empty() : tensor<1x64xf32>
+    %705 = "ttir.relu"(%703, %704) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %706 = tensor.empty() : tensor<1x64xf32>
+    %707 = "ttir.relu"(%705, %706) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %708 = tensor.empty() : tensor<1x64xf32>
+    %709 = "ttir.relu"(%707, %708) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %710 = tensor.empty() : tensor<1x64xf32>
+    %711 = "ttir.relu"(%709, %710) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %712 = tensor.empty() : tensor<1x64xf32>
+    %713 = "ttir.relu"(%711, %712) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %714 = tensor.empty() : tensor<1x64xf32>
+    %715 = "ttir.relu"(%713, %714) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %716 = tensor.empty() : tensor<1x64xf32>
+    %717 = "ttir.relu"(%715, %716) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %718 = tensor.empty() : tensor<1x64xf32>
+    %719 = "ttir.relu"(%717, %718) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %720 = tensor.empty() : tensor<1x64xf32>
+    %721 = "ttir.relu"(%719, %720) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %722 = tensor.empty() : tensor<1x64xf32>
+    %723 = "ttir.relu"(%721, %722) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %724 = tensor.empty() : tensor<1x64xf32>
+    %725 = "ttir.relu"(%723, %724) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %726 = tensor.empty() : tensor<1x64xf32>
+    %727 = "ttir.relu"(%725, %726) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %728 = tensor.empty() : tensor<1x64xf32>
+    %729 = "ttir.relu"(%727, %728) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %730 = tensor.empty() : tensor<1x64xf32>
+    %731 = "ttir.relu"(%729, %730) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %732 = tensor.empty() : tensor<1x64xf32>
+    %733 = "ttir.relu"(%731, %732) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %734 = tensor.empty() : tensor<1x64xf32>
+    %735 = "ttir.relu"(%733, %734) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %736 = tensor.empty() : tensor<1x64xf32>
+    %737 = "ttir.relu"(%735, %736) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %738 = tensor.empty() : tensor<1x64xf32>
+    %739 = "ttir.relu"(%737, %738) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %740 = tensor.empty() : tensor<1x64xf32>
+    %741 = "ttir.relu"(%739, %740) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %742 = tensor.empty() : tensor<1x64xf32>
+    %743 = "ttir.relu"(%741, %742) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %744 = tensor.empty() : tensor<1x64xf32>
+    %745 = "ttir.relu"(%743, %744) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %746 = tensor.empty() : tensor<1x64xf32>
+    %747 = "ttir.relu"(%745, %746) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %748 = tensor.empty() : tensor<1x64xf32>
+    %749 = "ttir.relu"(%747, %748) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %750 = tensor.empty() : tensor<1x64xf32>
+    %751 = "ttir.relu"(%749, %750) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %752 = tensor.empty() : tensor<1x64xf32>
+    %753 = "ttir.relu"(%751, %752) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %754 = tensor.empty() : tensor<1x64xf32>
+    %755 = "ttir.relu"(%753, %754) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %756 = tensor.empty() : tensor<1x64xf32>
+    %757 = "ttir.relu"(%755, %756) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %758 = tensor.empty() : tensor<1x64xf32>
+    %759 = "ttir.relu"(%757, %758) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %760 = tensor.empty() : tensor<1x64xf32>
+    %761 = "ttir.relu"(%759, %760) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %762 = tensor.empty() : tensor<1x64xf32>
+    %763 = "ttir.relu"(%761, %762) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %764 = tensor.empty() : tensor<1x64xf32>
+    %765 = "ttir.relu"(%763, %764) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %766 = tensor.empty() : tensor<1x64xf32>
+    %767 = "ttir.relu"(%765, %766) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %768 = tensor.empty() : tensor<1x64xf32>
+    %769 = "ttir.relu"(%767, %768) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %770 = tensor.empty() : tensor<1x64xf32>
+    %771 = "ttir.relu"(%769, %770) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %772 = tensor.empty() : tensor<1x64xf32>
+    %773 = "ttir.relu"(%771, %772) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %774 = tensor.empty() : tensor<1x64xf32>
+    %775 = "ttir.relu"(%773, %774) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %776 = tensor.empty() : tensor<1x64xf32>
+    %777 = "ttir.relu"(%775, %776) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %778 = tensor.empty() : tensor<1x64xf32>
+    %779 = "ttir.relu"(%777, %778) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %780 = tensor.empty() : tensor<1x64xf32>
+    %781 = "ttir.relu"(%779, %780) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %782 = tensor.empty() : tensor<1x64xf32>
+    %783 = "ttir.relu"(%781, %782) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %784 = tensor.empty() : tensor<1x64xf32>
+    %785 = "ttir.relu"(%783, %784) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %786 = tensor.empty() : tensor<1x64xf32>
+    %787 = "ttir.relu"(%785, %786) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %788 = tensor.empty() : tensor<1x64xf32>
+    %789 = "ttir.relu"(%787, %788) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %790 = tensor.empty() : tensor<1x64xf32>
+    %791 = "ttir.relu"(%789, %790) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %792 = tensor.empty() : tensor<1x64xf32>
+    %793 = "ttir.relu"(%791, %792) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %794 = tensor.empty() : tensor<1x64xf32>
+    %795 = "ttir.relu"(%793, %794) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %796 = tensor.empty() : tensor<1x64xf32>
+    %797 = "ttir.relu"(%795, %796) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %798 = tensor.empty() : tensor<1x64xf32>
+    %799 = "ttir.relu"(%797, %798) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %800 = tensor.empty() : tensor<1x64xf32>
+    %801 = "ttir.relu"(%799, %800) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %802 = tensor.empty() : tensor<1x64xf32>
+    %803 = "ttir.relu"(%801, %802) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %804 = tensor.empty() : tensor<1x64xf32>
+    %805 = "ttir.relu"(%803, %804) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %806 = tensor.empty() : tensor<1x64xf32>
+    %807 = "ttir.relu"(%805, %806) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %808 = tensor.empty() : tensor<1x64xf32>
+    %809 = "ttir.relu"(%807, %808) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %810 = tensor.empty() : tensor<1x64xf32>
+    %811 = "ttir.relu"(%809, %810) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %812 = tensor.empty() : tensor<1x64xf32>
+    %813 = "ttir.relu"(%811, %812) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %814 = tensor.empty() : tensor<1x64xf32>
+    %815 = "ttir.relu"(%813, %814) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %816 = tensor.empty() : tensor<1x64xf32>
+    %817 = "ttir.relu"(%815, %816) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %818 = tensor.empty() : tensor<1x64xf32>
+    %819 = "ttir.relu"(%817, %818) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %820 = tensor.empty() : tensor<1x64xf32>
+    %821 = "ttir.relu"(%819, %820) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %822 = tensor.empty() : tensor<1x64xf32>
+    %823 = "ttir.relu"(%821, %822) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %824 = tensor.empty() : tensor<1x64xf32>
+    %825 = "ttir.relu"(%823, %824) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %826 = tensor.empty() : tensor<1x64xf32>
+    %827 = "ttir.relu"(%825, %826) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %828 = tensor.empty() : tensor<1x64xf32>
+    %829 = "ttir.relu"(%827, %828) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %830 = tensor.empty() : tensor<1x64xf32>
+    %831 = "ttir.relu"(%829, %830) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %832 = tensor.empty() : tensor<1x64xf32>
+    %833 = "ttir.relu"(%831, %832) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %834 = tensor.empty() : tensor<1x64xf32>
+    %835 = "ttir.relu"(%833, %834) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %836 = tensor.empty() : tensor<1x64xf32>
+    %837 = "ttir.relu"(%835, %836) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %838 = tensor.empty() : tensor<1x64xf32>
+    %839 = "ttir.relu"(%837, %838) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %840 = tensor.empty() : tensor<1x64xf32>
+    %841 = "ttir.relu"(%839, %840) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %842 = tensor.empty() : tensor<1x64xf32>
+    %843 = "ttir.relu"(%841, %842) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %844 = tensor.empty() : tensor<1x64xf32>
+    %845 = "ttir.relu"(%843, %844) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %846 = tensor.empty() : tensor<1x64xf32>
+    %847 = "ttir.relu"(%845, %846) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %848 = tensor.empty() : tensor<1x64xf32>
+    %849 = "ttir.relu"(%847, %848) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %850 = tensor.empty() : tensor<1x64xf32>
+    %851 = "ttir.relu"(%849, %850) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %852 = tensor.empty() : tensor<1x64xf32>
+    %853 = "ttir.relu"(%851, %852) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %854 = tensor.empty() : tensor<1x64xf32>
+    %855 = "ttir.relu"(%853, %854) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %856 = tensor.empty() : tensor<1x64xf32>
+    %857 = "ttir.relu"(%855, %856) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %858 = tensor.empty() : tensor<1x64xf32>
+    %859 = "ttir.relu"(%857, %858) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %860 = tensor.empty() : tensor<1x64xf32>
+    %861 = "ttir.relu"(%859, %860) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %862 = tensor.empty() : tensor<1x64xf32>
+    %863 = "ttir.relu"(%861, %862) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %864 = tensor.empty() : tensor<1x64xf32>
+    %865 = "ttir.relu"(%863, %864) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %866 = tensor.empty() : tensor<1x64xf32>
+    %867 = "ttir.relu"(%865, %866) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %868 = tensor.empty() : tensor<1x64xf32>
+    %869 = "ttir.relu"(%867, %868) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %870 = tensor.empty() : tensor<1x64xf32>
+    %871 = "ttir.relu"(%869, %870) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %872 = tensor.empty() : tensor<1x64xf32>
+    %873 = "ttir.relu"(%871, %872) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %874 = tensor.empty() : tensor<1x64xf32>
+    %875 = "ttir.relu"(%873, %874) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %876 = tensor.empty() : tensor<1x64xf32>
+    %877 = "ttir.relu"(%875, %876) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %878 = tensor.empty() : tensor<1x64xf32>
+    %879 = "ttir.relu"(%877, %878) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %880 = tensor.empty() : tensor<1x64xf32>
+    %881 = "ttir.relu"(%879, %880) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %882 = tensor.empty() : tensor<1x64xf32>
+    %883 = "ttir.relu"(%881, %882) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %884 = tensor.empty() : tensor<1x64xf32>
+    %885 = "ttir.relu"(%883, %884) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %886 = tensor.empty() : tensor<1x64xf32>
+    %887 = "ttir.relu"(%885, %886) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %888 = tensor.empty() : tensor<1x64xf32>
+    %889 = "ttir.relu"(%887, %888) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %890 = tensor.empty() : tensor<1x64xf32>
+    %891 = "ttir.relu"(%889, %890) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %892 = tensor.empty() : tensor<1x64xf32>
+    %893 = "ttir.relu"(%891, %892) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %894 = tensor.empty() : tensor<1x64xf32>
+    %895 = "ttir.relu"(%893, %894) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %896 = tensor.empty() : tensor<1x64xf32>
+    %897 = "ttir.relu"(%895, %896) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %898 = tensor.empty() : tensor<1x64xf32>
+    %899 = "ttir.relu"(%897, %898) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %900 = tensor.empty() : tensor<1x64xf32>
+    %901 = "ttir.relu"(%899, %900) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %902 = tensor.empty() : tensor<1x64xf32>
+    %903 = "ttir.relu"(%901, %902) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %904 = tensor.empty() : tensor<1x64xf32>
+    %905 = "ttir.relu"(%903, %904) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %906 = tensor.empty() : tensor<1x64xf32>
+    %907 = "ttir.relu"(%905, %906) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %908 = tensor.empty() : tensor<1x64xf32>
+    %909 = "ttir.relu"(%907, %908) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %910 = tensor.empty() : tensor<1x64xf32>
+    %911 = "ttir.relu"(%909, %910) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %912 = tensor.empty() : tensor<1x64xf32>
+    %913 = "ttir.relu"(%911, %912) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %914 = tensor.empty() : tensor<1x64xf32>
+    %915 = "ttir.relu"(%913, %914) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %916 = tensor.empty() : tensor<1x64xf32>
+    %917 = "ttir.relu"(%915, %916) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %918 = tensor.empty() : tensor<1x64xf32>
+    %919 = "ttir.relu"(%917, %918) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %920 = tensor.empty() : tensor<1x64xf32>
+    %921 = "ttir.relu"(%919, %920) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %922 = tensor.empty() : tensor<1x64xf32>
+    %923 = "ttir.relu"(%921, %922) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %924 = tensor.empty() : tensor<1x64xf32>
+    %925 = "ttir.relu"(%923, %924) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %926 = tensor.empty() : tensor<1x64xf32>
+    %927 = "ttir.relu"(%925, %926) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %928 = tensor.empty() : tensor<1x64xf32>
+    %929 = "ttir.relu"(%927, %928) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %930 = tensor.empty() : tensor<1x64xf32>
+    %931 = "ttir.relu"(%929, %930) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %932 = tensor.empty() : tensor<1x64xf32>
+    %933 = "ttir.relu"(%931, %932) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %934 = tensor.empty() : tensor<1x64xf32>
+    %935 = "ttir.relu"(%933, %934) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %936 = tensor.empty() : tensor<1x64xf32>
+    %937 = "ttir.relu"(%935, %936) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %938 = tensor.empty() : tensor<1x64xf32>
+    %939 = "ttir.relu"(%937, %938) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %940 = tensor.empty() : tensor<1x64xf32>
+    %941 = "ttir.relu"(%939, %940) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %942 = tensor.empty() : tensor<1x64xf32>
+    %943 = "ttir.relu"(%941, %942) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %944 = tensor.empty() : tensor<1x64xf32>
+    %945 = "ttir.relu"(%943, %944) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %946 = tensor.empty() : tensor<1x64xf32>
+    %947 = "ttir.relu"(%945, %946) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %948 = tensor.empty() : tensor<1x64xf32>
+    %949 = "ttir.relu"(%947, %948) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %950 = tensor.empty() : tensor<1x64xf32>
+    %951 = "ttir.relu"(%949, %950) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %952 = tensor.empty() : tensor<1x64xf32>
+    %953 = "ttir.relu"(%951, %952) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %954 = tensor.empty() : tensor<1x64xf32>
+    %955 = "ttir.relu"(%953, %954) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %956 = tensor.empty() : tensor<1x64xf32>
+    %957 = "ttir.relu"(%955, %956) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %958 = tensor.empty() : tensor<1x64xf32>
+    %959 = "ttir.relu"(%957, %958) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %960 = tensor.empty() : tensor<1x64xf32>
+    %961 = "ttir.relu"(%959, %960) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %962 = tensor.empty() : tensor<1x64xf32>
+    %963 = "ttir.relu"(%961, %962) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %964 = tensor.empty() : tensor<1x64xf32>
+    %965 = "ttir.relu"(%963, %964) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %966 = tensor.empty() : tensor<1x64xf32>
+    %967 = "ttir.relu"(%965, %966) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %968 = tensor.empty() : tensor<1x64xf32>
+    %969 = "ttir.relu"(%967, %968) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %970 = tensor.empty() : tensor<1x64xf32>
+    %971 = "ttir.relu"(%969, %970) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %972 = tensor.empty() : tensor<1x64xf32>
+    %973 = "ttir.relu"(%971, %972) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %974 = tensor.empty() : tensor<1x64xf32>
+    %975 = "ttir.relu"(%973, %974) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %976 = tensor.empty() : tensor<1x64xf32>
+    %977 = "ttir.relu"(%975, %976) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %978 = tensor.empty() : tensor<1x64xf32>
+    %979 = "ttir.relu"(%977, %978) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %980 = tensor.empty() : tensor<1x64xf32>
+    %981 = "ttir.relu"(%979, %980) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %982 = tensor.empty() : tensor<1x64xf32>
+    %983 = "ttir.relu"(%981, %982) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %984 = tensor.empty() : tensor<1x64xf32>
+    %985 = "ttir.relu"(%983, %984) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %986 = tensor.empty() : tensor<1x64xf32>
+    %987 = "ttir.relu"(%985, %986) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %988 = tensor.empty() : tensor<1x64xf32>
+    %989 = "ttir.relu"(%987, %988) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %990 = tensor.empty() : tensor<1x64xf32>
+    %991 = "ttir.relu"(%989, %990) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %992 = tensor.empty() : tensor<1x64xf32>
+    %993 = "ttir.relu"(%991, %992) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %994 = tensor.empty() : tensor<1x64xf32>
+    %995 = "ttir.relu"(%993, %994) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %996 = tensor.empty() : tensor<1x64xf32>
+    %997 = "ttir.relu"(%995, %996) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %998 = tensor.empty() : tensor<1x64xf32>
+    %999 = "ttir.relu"(%997, %998) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1000 = tensor.empty() : tensor<1x64xf32>
+    %1001 = "ttir.relu"(%999, %1000) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1002 = tensor.empty() : tensor<1x64xf32>
+    %1003 = "ttir.relu"(%1001, %1002) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1004 = tensor.empty() : tensor<1x64xf32>
+    %1005 = "ttir.relu"(%1003, %1004) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1006 = tensor.empty() : tensor<1x64xf32>
+    %1007 = "ttir.relu"(%1005, %1006) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1008 = tensor.empty() : tensor<1x64xf32>
+    %1009 = "ttir.relu"(%1007, %1008) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1010 = tensor.empty() : tensor<1x64xf32>
+    %1011 = "ttir.relu"(%1009, %1010) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1012 = tensor.empty() : tensor<1x64xf32>
+    %1013 = "ttir.relu"(%1011, %1012) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1014 = tensor.empty() : tensor<1x64xf32>
+    %1015 = "ttir.relu"(%1013, %1014) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1016 = tensor.empty() : tensor<1x64xf32>
+    %1017 = "ttir.relu"(%1015, %1016) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1018 = tensor.empty() : tensor<1x64xf32>
+    %1019 = "ttir.relu"(%1017, %1018) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1020 = tensor.empty() : tensor<1x64xf32>
+    %1021 = "ttir.relu"(%1019, %1020) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1022 = tensor.empty() : tensor<1x64xf32>
+    %1023 = "ttir.relu"(%1021, %1022) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1024 = tensor.empty() : tensor<1x64xf32>
+    %1025 = "ttir.relu"(%1023, %1024) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1026 = tensor.empty() : tensor<1x64xf32>
+    %1027 = "ttir.relu"(%1025, %1026) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1028 = tensor.empty() : tensor<1x64xf32>
+    %1029 = "ttir.relu"(%1027, %1028) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1030 = tensor.empty() : tensor<1x64xf32>
+    %1031 = "ttir.relu"(%1029, %1030) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1032 = tensor.empty() : tensor<1x64xf32>
+    %1033 = "ttir.relu"(%1031, %1032) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1034 = tensor.empty() : tensor<1x64xf32>
+    %1035 = "ttir.relu"(%1033, %1034) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1036 = tensor.empty() : tensor<1x64xf32>
+    %1037 = "ttir.relu"(%1035, %1036) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1038 = tensor.empty() : tensor<1x64xf32>
+    %1039 = "ttir.relu"(%1037, %1038) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1040 = tensor.empty() : tensor<1x64xf32>
+    %1041 = "ttir.relu"(%1039, %1040) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1042 = tensor.empty() : tensor<1x64xf32>
+    %1043 = "ttir.relu"(%1041, %1042) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1044 = tensor.empty() : tensor<1x64xf32>
+    %1045 = "ttir.relu"(%1043, %1044) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1046 = tensor.empty() : tensor<1x64xf32>
+    %1047 = "ttir.relu"(%1045, %1046) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1048 = tensor.empty() : tensor<1x64xf32>
+    %1049 = "ttir.relu"(%1047, %1048) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1050 = tensor.empty() : tensor<1x64xf32>
+    %1051 = "ttir.relu"(%1049, %1050) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1052 = tensor.empty() : tensor<1x64xf32>
+    %1053 = "ttir.relu"(%1051, %1052) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1054 = tensor.empty() : tensor<1x64xf32>
+    %1055 = "ttir.relu"(%1053, %1054) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1056 = tensor.empty() : tensor<1x64xf32>
+    %1057 = "ttir.relu"(%1055, %1056) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1058 = tensor.empty() : tensor<1x64xf32>
+    %1059 = "ttir.relu"(%1057, %1058) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1060 = tensor.empty() : tensor<1x64xf32>
+    %1061 = "ttir.relu"(%1059, %1060) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1062 = tensor.empty() : tensor<1x64xf32>
+    %1063 = "ttir.relu"(%1061, %1062) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1064 = tensor.empty() : tensor<1x64xf32>
+    %1065 = "ttir.relu"(%1063, %1064) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1066 = tensor.empty() : tensor<1x64xf32>
+    %1067 = "ttir.relu"(%1065, %1066) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1068 = tensor.empty() : tensor<1x64xf32>
+    %1069 = "ttir.relu"(%1067, %1068) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1070 = tensor.empty() : tensor<1x64xf32>
+    %1071 = "ttir.relu"(%1069, %1070) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1072 = tensor.empty() : tensor<1x64xf32>
+    %1073 = "ttir.relu"(%1071, %1072) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1074 = tensor.empty() : tensor<1x64xf32>
+    %1075 = "ttir.relu"(%1073, %1074) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1076 = tensor.empty() : tensor<1x64xf32>
+    %1077 = "ttir.relu"(%1075, %1076) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1078 = tensor.empty() : tensor<1x64xf32>
+    %1079 = "ttir.relu"(%1077, %1078) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1080 = tensor.empty() : tensor<1x64xf32>
+    %1081 = "ttir.relu"(%1079, %1080) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1082 = tensor.empty() : tensor<1x64xf32>
+    %1083 = "ttir.relu"(%1081, %1082) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1084 = tensor.empty() : tensor<1x64xf32>
+    %1085 = "ttir.relu"(%1083, %1084) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1086 = tensor.empty() : tensor<1x64xf32>
+    %1087 = "ttir.relu"(%1085, %1086) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1088 = tensor.empty() : tensor<1x64xf32>
+    %1089 = "ttir.relu"(%1087, %1088) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1090 = tensor.empty() : tensor<1x64xf32>
+    %1091 = "ttir.relu"(%1089, %1090) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1092 = tensor.empty() : tensor<1x64xf32>
+    %1093 = "ttir.relu"(%1091, %1092) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1094 = tensor.empty() : tensor<1x64xf32>
+    %1095 = "ttir.relu"(%1093, %1094) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1096 = tensor.empty() : tensor<1x64xf32>
+    %1097 = "ttir.relu"(%1095, %1096) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1098 = tensor.empty() : tensor<1x64xf32>
+    %1099 = "ttir.relu"(%1097, %1098) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1100 = tensor.empty() : tensor<1x64xf32>
+    %1101 = "ttir.relu"(%1099, %1100) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1102 = tensor.empty() : tensor<1x64xf32>
+    %1103 = "ttir.relu"(%1101, %1102) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1104 = tensor.empty() : tensor<1x64xf32>
+    %1105 = "ttir.relu"(%1103, %1104) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1106 = tensor.empty() : tensor<1x64xf32>
+    %1107 = "ttir.relu"(%1105, %1106) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1108 = tensor.empty() : tensor<1x64xf32>
+    %1109 = "ttir.relu"(%1107, %1108) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1110 = tensor.empty() : tensor<1x64xf32>
+    %1111 = "ttir.relu"(%1109, %1110) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1112 = tensor.empty() : tensor<1x64xf32>
+    %1113 = "ttir.relu"(%1111, %1112) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1114 = tensor.empty() : tensor<1x64xf32>
+    %1115 = "ttir.relu"(%1113, %1114) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1116 = tensor.empty() : tensor<1x64xf32>
+    %1117 = "ttir.relu"(%1115, %1116) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1118 = tensor.empty() : tensor<1x64xf32>
+    %1119 = "ttir.relu"(%1117, %1118) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1120 = tensor.empty() : tensor<1x64xf32>
+    %1121 = "ttir.relu"(%1119, %1120) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1122 = tensor.empty() : tensor<1x64xf32>
+    %1123 = "ttir.relu"(%1121, %1122) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1124 = tensor.empty() : tensor<1x64xf32>
+    %1125 = "ttir.relu"(%1123, %1124) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1126 = tensor.empty() : tensor<1x64xf32>
+    %1127 = "ttir.relu"(%1125, %1126) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1128 = tensor.empty() : tensor<1x64xf32>
+    %1129 = "ttir.relu"(%1127, %1128) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1130 = tensor.empty() : tensor<1x64xf32>
+    %1131 = "ttir.relu"(%1129, %1130) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1132 = tensor.empty() : tensor<1x64xf32>
+    %1133 = "ttir.relu"(%1131, %1132) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1134 = tensor.empty() : tensor<1x64xf32>
+    %1135 = "ttir.relu"(%1133, %1134) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1136 = tensor.empty() : tensor<1x64xf32>
+    %1137 = "ttir.relu"(%1135, %1136) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1138 = tensor.empty() : tensor<1x64xf32>
+    %1139 = "ttir.relu"(%1137, %1138) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1140 = tensor.empty() : tensor<1x64xf32>
+    %1141 = "ttir.relu"(%1139, %1140) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1142 = tensor.empty() : tensor<1x64xf32>
+    %1143 = "ttir.relu"(%1141, %1142) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1144 = tensor.empty() : tensor<1x64xf32>
+    %1145 = "ttir.relu"(%1143, %1144) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1146 = tensor.empty() : tensor<1x64xf32>
+    %1147 = "ttir.relu"(%1145, %1146) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1148 = tensor.empty() : tensor<1x64xf32>
+    %1149 = "ttir.relu"(%1147, %1148) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1150 = tensor.empty() : tensor<1x64xf32>
+    %1151 = "ttir.relu"(%1149, %1150) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1152 = tensor.empty() : tensor<1x64xf32>
+    %1153 = "ttir.relu"(%1151, %1152) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1154 = tensor.empty() : tensor<1x64xf32>
+    %1155 = "ttir.relu"(%1153, %1154) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1156 = tensor.empty() : tensor<1x64xf32>
+    %1157 = "ttir.relu"(%1155, %1156) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1158 = tensor.empty() : tensor<1x64xf32>
+    %1159 = "ttir.relu"(%1157, %1158) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1160 = tensor.empty() : tensor<1x64xf32>
+    %1161 = "ttir.relu"(%1159, %1160) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1162 = tensor.empty() : tensor<1x64xf32>
+    %1163 = "ttir.relu"(%1161, %1162) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1164 = tensor.empty() : tensor<1x64xf32>
+    %1165 = "ttir.relu"(%1163, %1164) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1166 = tensor.empty() : tensor<1x64xf32>
+    %1167 = "ttir.relu"(%1165, %1166) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1168 = tensor.empty() : tensor<1x64xf32>
+    %1169 = "ttir.relu"(%1167, %1168) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1170 = tensor.empty() : tensor<1x64xf32>
+    %1171 = "ttir.relu"(%1169, %1170) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1172 = tensor.empty() : tensor<1x64xf32>
+    %1173 = "ttir.relu"(%1171, %1172) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1174 = tensor.empty() : tensor<1x64xf32>
+    %1175 = "ttir.relu"(%1173, %1174) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1176 = tensor.empty() : tensor<1x64xf32>
+    %1177 = "ttir.relu"(%1175, %1176) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1178 = tensor.empty() : tensor<1x64xf32>
+    %1179 = "ttir.relu"(%1177, %1178) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1180 = tensor.empty() : tensor<1x64xf32>
+    %1181 = "ttir.relu"(%1179, %1180) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1182 = tensor.empty() : tensor<1x64xf32>
+    %1183 = "ttir.relu"(%1181, %1182) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1184 = tensor.empty() : tensor<1x64xf32>
+    %1185 = "ttir.relu"(%1183, %1184) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1186 = tensor.empty() : tensor<1x64xf32>
+    %1187 = "ttir.relu"(%1185, %1186) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1188 = tensor.empty() : tensor<1x64xf32>
+    %1189 = "ttir.relu"(%1187, %1188) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1190 = tensor.empty() : tensor<1x64xf32>
+    %1191 = "ttir.relu"(%1189, %1190) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1192 = tensor.empty() : tensor<1x64xf32>
+    %1193 = "ttir.relu"(%1191, %1192) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1194 = tensor.empty() : tensor<1x64xf32>
+    %1195 = "ttir.relu"(%1193, %1194) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1196 = tensor.empty() : tensor<1x64xf32>
+    %1197 = "ttir.relu"(%1195, %1196) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1198 = tensor.empty() : tensor<1x64xf32>
+    %1199 = "ttir.relu"(%1197, %1198) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1200 = tensor.empty() : tensor<1x64xf32>
+    %1201 = "ttir.relu"(%1199, %1200) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1202 = tensor.empty() : tensor<1x64xf32>
+    %1203 = "ttir.relu"(%1201, %1202) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1204 = tensor.empty() : tensor<1x64xf32>
+    %1205 = "ttir.relu"(%1203, %1204) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1206 = tensor.empty() : tensor<1x64xf32>
+    %1207 = "ttir.relu"(%1205, %1206) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1208 = tensor.empty() : tensor<1x64xf32>
+    %1209 = "ttir.relu"(%1207, %1208) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1210 = tensor.empty() : tensor<1x64xf32>
+    %1211 = "ttir.relu"(%1209, %1210) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1212 = tensor.empty() : tensor<1x64xf32>
+    %1213 = "ttir.relu"(%1211, %1212) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1214 = tensor.empty() : tensor<1x64xf32>
+    %1215 = "ttir.relu"(%1213, %1214) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1216 = tensor.empty() : tensor<1x64xf32>
+    %1217 = "ttir.relu"(%1215, %1216) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1218 = tensor.empty() : tensor<1x64xf32>
+    %1219 = "ttir.relu"(%1217, %1218) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1220 = tensor.empty() : tensor<1x64xf32>
+    %1221 = "ttir.relu"(%1219, %1220) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1222 = tensor.empty() : tensor<1x64xf32>
+    %1223 = "ttir.relu"(%1221, %1222) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1224 = tensor.empty() : tensor<1x64xf32>
+    %1225 = "ttir.relu"(%1223, %1224) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1226 = tensor.empty() : tensor<1x64xf32>
+    %1227 = "ttir.relu"(%1225, %1226) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1228 = tensor.empty() : tensor<1x64xf32>
+    %1229 = "ttir.relu"(%1227, %1228) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1230 = tensor.empty() : tensor<1x64xf32>
+    %1231 = "ttir.relu"(%1229, %1230) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1232 = tensor.empty() : tensor<1x64xf32>
+    %1233 = "ttir.relu"(%1231, %1232) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1234 = tensor.empty() : tensor<1x64xf32>
+    %1235 = "ttir.relu"(%1233, %1234) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1236 = tensor.empty() : tensor<1x64xf32>
+    %1237 = "ttir.relu"(%1235, %1236) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1238 = tensor.empty() : tensor<1x64xf32>
+    %1239 = "ttir.relu"(%1237, %1238) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1240 = tensor.empty() : tensor<1x64xf32>
+    %1241 = "ttir.relu"(%1239, %1240) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1242 = tensor.empty() : tensor<1x64xf32>
+    %1243 = "ttir.relu"(%1241, %1242) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1244 = tensor.empty() : tensor<1x64xf32>
+    %1245 = "ttir.relu"(%1243, %1244) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1246 = tensor.empty() : tensor<1x64xf32>
+    %1247 = "ttir.relu"(%1245, %1246) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1248 = tensor.empty() : tensor<1x64xf32>
+    %1249 = "ttir.relu"(%1247, %1248) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1250 = tensor.empty() : tensor<1x64xf32>
+    %1251 = "ttir.relu"(%1249, %1250) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1252 = tensor.empty() : tensor<1x64xf32>
+    %1253 = "ttir.relu"(%1251, %1252) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1254 = tensor.empty() : tensor<1x64xf32>
+    %1255 = "ttir.relu"(%1253, %1254) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1256 = tensor.empty() : tensor<1x64xf32>
+    %1257 = "ttir.relu"(%1255, %1256) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1258 = tensor.empty() : tensor<1x64xf32>
+    %1259 = "ttir.relu"(%1257, %1258) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1260 = tensor.empty() : tensor<1x64xf32>
+    %1261 = "ttir.relu"(%1259, %1260) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1262 = tensor.empty() : tensor<1x64xf32>
+    %1263 = "ttir.relu"(%1261, %1262) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1264 = tensor.empty() : tensor<1x64xf32>
+    %1265 = "ttir.relu"(%1263, %1264) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1266 = tensor.empty() : tensor<1x64xf32>
+    %1267 = "ttir.relu"(%1265, %1266) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1268 = tensor.empty() : tensor<1x64xf32>
+    %1269 = "ttir.relu"(%1267, %1268) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1270 = tensor.empty() : tensor<1x64xf32>
+    %1271 = "ttir.relu"(%1269, %1270) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1272 = tensor.empty() : tensor<1x64xf32>
+    %1273 = "ttir.relu"(%1271, %1272) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1274 = tensor.empty() : tensor<1x64xf32>
+    %1275 = "ttir.relu"(%1273, %1274) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1276 = tensor.empty() : tensor<1x64xf32>
+    %1277 = "ttir.relu"(%1275, %1276) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1278 = tensor.empty() : tensor<1x64xf32>
+    %1279 = "ttir.relu"(%1277, %1278) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1280 = tensor.empty() : tensor<1x64xf32>
+    %1281 = "ttir.relu"(%1279, %1280) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1282 = tensor.empty() : tensor<1x64xf32>
+    %1283 = "ttir.relu"(%1281, %1282) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1284 = tensor.empty() : tensor<1x64xf32>
+    %1285 = "ttir.relu"(%1283, %1284) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1286 = tensor.empty() : tensor<1x64xf32>
+    %1287 = "ttir.relu"(%1285, %1286) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1288 = tensor.empty() : tensor<1x64xf32>
+    %1289 = "ttir.relu"(%1287, %1288) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1290 = tensor.empty() : tensor<1x64xf32>
+    %1291 = "ttir.relu"(%1289, %1290) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1292 = tensor.empty() : tensor<1x64xf32>
+    %1293 = "ttir.relu"(%1291, %1292) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1294 = tensor.empty() : tensor<1x64xf32>
+    %1295 = "ttir.relu"(%1293, %1294) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1296 = tensor.empty() : tensor<1x64xf32>
+    %1297 = "ttir.relu"(%1295, %1296) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1298 = tensor.empty() : tensor<1x64xf32>
+    %1299 = "ttir.relu"(%1297, %1298) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1300 = tensor.empty() : tensor<1x64xf32>
+    %1301 = "ttir.relu"(%1299, %1300) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1302 = tensor.empty() : tensor<1x64xf32>
+    %1303 = "ttir.relu"(%1301, %1302) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1304 = tensor.empty() : tensor<1x64xf32>
+    %1305 = "ttir.relu"(%1303, %1304) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1306 = tensor.empty() : tensor<1x64xf32>
+    %1307 = "ttir.relu"(%1305, %1306) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1308 = tensor.empty() : tensor<1x64xf32>
+    %1309 = "ttir.relu"(%1307, %1308) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1310 = tensor.empty() : tensor<1x64xf32>
+    %1311 = "ttir.relu"(%1309, %1310) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1312 = tensor.empty() : tensor<1x64xf32>
+    %1313 = "ttir.relu"(%1311, %1312) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1314 = tensor.empty() : tensor<1x64xf32>
+    %1315 = "ttir.relu"(%1313, %1314) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1316 = tensor.empty() : tensor<1x64xf32>
+    %1317 = "ttir.relu"(%1315, %1316) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1318 = tensor.empty() : tensor<1x64xf32>
+    %1319 = "ttir.relu"(%1317, %1318) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1320 = tensor.empty() : tensor<1x64xf32>
+    %1321 = "ttir.relu"(%1319, %1320) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1322 = tensor.empty() : tensor<1x64xf32>
+    %1323 = "ttir.relu"(%1321, %1322) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1324 = tensor.empty() : tensor<1x64xf32>
+    %1325 = "ttir.relu"(%1323, %1324) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1326 = tensor.empty() : tensor<1x64xf32>
+    %1327 = "ttir.relu"(%1325, %1326) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1328 = tensor.empty() : tensor<1x64xf32>
+    %1329 = "ttir.relu"(%1327, %1328) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1330 = tensor.empty() : tensor<1x64xf32>
+    %1331 = "ttir.relu"(%1329, %1330) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1332 = tensor.empty() : tensor<1x64xf32>
+    %1333 = "ttir.relu"(%1331, %1332) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1334 = tensor.empty() : tensor<1x64xf32>
+    %1335 = "ttir.relu"(%1333, %1334) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1336 = tensor.empty() : tensor<1x64xf32>
+    %1337 = "ttir.relu"(%1335, %1336) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1338 = tensor.empty() : tensor<1x64xf32>
+    %1339 = "ttir.relu"(%1337, %1338) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1340 = tensor.empty() : tensor<1x64xf32>
+    %1341 = "ttir.relu"(%1339, %1340) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1342 = tensor.empty() : tensor<1x64xf32>
+    %1343 = "ttir.relu"(%1341, %1342) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1344 = tensor.empty() : tensor<1x64xf32>
+    %1345 = "ttir.relu"(%1343, %1344) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1346 = tensor.empty() : tensor<1x64xf32>
+    %1347 = "ttir.relu"(%1345, %1346) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1348 = tensor.empty() : tensor<1x64xf32>
+    %1349 = "ttir.relu"(%1347, %1348) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1350 = tensor.empty() : tensor<1x64xf32>
+    %1351 = "ttir.relu"(%1349, %1350) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1352 = tensor.empty() : tensor<1x64xf32>
+    %1353 = "ttir.relu"(%1351, %1352) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1354 = tensor.empty() : tensor<1x64xf32>
+    %1355 = "ttir.relu"(%1353, %1354) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1356 = tensor.empty() : tensor<1x64xf32>
+    %1357 = "ttir.relu"(%1355, %1356) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1358 = tensor.empty() : tensor<1x64xf32>
+    %1359 = "ttir.relu"(%1357, %1358) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1360 = tensor.empty() : tensor<1x64xf32>
+    %1361 = "ttir.relu"(%1359, %1360) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1362 = tensor.empty() : tensor<1x64xf32>
+    %1363 = "ttir.relu"(%1361, %1362) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1364 = tensor.empty() : tensor<1x64xf32>
+    %1365 = "ttir.relu"(%1363, %1364) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1366 = tensor.empty() : tensor<1x64xf32>
+    %1367 = "ttir.relu"(%1365, %1366) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1368 = tensor.empty() : tensor<1x64xf32>
+    %1369 = "ttir.relu"(%1367, %1368) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1370 = tensor.empty() : tensor<1x64xf32>
+    %1371 = "ttir.relu"(%1369, %1370) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1372 = tensor.empty() : tensor<1x64xf32>
+    %1373 = "ttir.relu"(%1371, %1372) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1374 = tensor.empty() : tensor<1x64xf32>
+    %1375 = "ttir.relu"(%1373, %1374) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1376 = tensor.empty() : tensor<1x64xf32>
+    %1377 = "ttir.relu"(%1375, %1376) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1378 = tensor.empty() : tensor<1x64xf32>
+    %1379 = "ttir.relu"(%1377, %1378) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1380 = tensor.empty() : tensor<1x64xf32>
+    %1381 = "ttir.relu"(%1379, %1380) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1382 = tensor.empty() : tensor<1x64xf32>
+    %1383 = "ttir.relu"(%1381, %1382) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1384 = tensor.empty() : tensor<1x64xf32>
+    %1385 = "ttir.relu"(%1383, %1384) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1386 = tensor.empty() : tensor<1x64xf32>
+    %1387 = "ttir.relu"(%1385, %1386) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1388 = tensor.empty() : tensor<1x64xf32>
+    %1389 = "ttir.relu"(%1387, %1388) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1390 = tensor.empty() : tensor<1x64xf32>
+    %1391 = "ttir.relu"(%1389, %1390) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1392 = tensor.empty() : tensor<1x64xf32>
+    %1393 = "ttir.relu"(%1391, %1392) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1394 = tensor.empty() : tensor<1x64xf32>
+    %1395 = "ttir.relu"(%1393, %1394) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1396 = tensor.empty() : tensor<1x64xf32>
+    %1397 = "ttir.relu"(%1395, %1396) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1398 = tensor.empty() : tensor<1x64xf32>
+    %1399 = "ttir.relu"(%1397, %1398) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1400 = tensor.empty() : tensor<1x64xf32>
+    %1401 = "ttir.relu"(%1399, %1400) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1402 = tensor.empty() : tensor<1x64xf32>
+    %1403 = "ttir.relu"(%1401, %1402) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1404 = tensor.empty() : tensor<1x64xf32>
+    %1405 = "ttir.relu"(%1403, %1404) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1406 = tensor.empty() : tensor<1x64xf32>
+    %1407 = "ttir.relu"(%1405, %1406) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1408 = tensor.empty() : tensor<1x64xf32>
+    %1409 = "ttir.relu"(%1407, %1408) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1410 = tensor.empty() : tensor<1x64xf32>
+    %1411 = "ttir.relu"(%1409, %1410) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1412 = tensor.empty() : tensor<1x64xf32>
+    %1413 = "ttir.relu"(%1411, %1412) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1414 = tensor.empty() : tensor<1x64xf32>
+    %1415 = "ttir.relu"(%1413, %1414) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1416 = tensor.empty() : tensor<1x64xf32>
+    %1417 = "ttir.relu"(%1415, %1416) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1418 = tensor.empty() : tensor<1x64xf32>
+    %1419 = "ttir.relu"(%1417, %1418) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1420 = tensor.empty() : tensor<1x64xf32>
+    %1421 = "ttir.relu"(%1419, %1420) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1422 = tensor.empty() : tensor<1x64xf32>
+    %1423 = "ttir.relu"(%1421, %1422) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1424 = tensor.empty() : tensor<1x64xf32>
+    %1425 = "ttir.relu"(%1423, %1424) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1426 = tensor.empty() : tensor<1x64xf32>
+    %1427 = "ttir.relu"(%1425, %1426) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1428 = tensor.empty() : tensor<1x64xf32>
+    %1429 = "ttir.relu"(%1427, %1428) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1430 = tensor.empty() : tensor<1x64xf32>
+    %1431 = "ttir.relu"(%1429, %1430) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1432 = tensor.empty() : tensor<1x64xf32>
+    %1433 = "ttir.relu"(%1431, %1432) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1434 = tensor.empty() : tensor<1x64xf32>
+    %1435 = "ttir.relu"(%1433, %1434) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1436 = tensor.empty() : tensor<1x64xf32>
+    %1437 = "ttir.relu"(%1435, %1436) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1438 = tensor.empty() : tensor<1x64xf32>
+    %1439 = "ttir.relu"(%1437, %1438) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1440 = tensor.empty() : tensor<1x64xf32>
+    %1441 = "ttir.relu"(%1439, %1440) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1442 = tensor.empty() : tensor<1x64xf32>
+    %1443 = "ttir.relu"(%1441, %1442) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1444 = tensor.empty() : tensor<1x64xf32>
+    %1445 = "ttir.relu"(%1443, %1444) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1446 = tensor.empty() : tensor<1x64xf32>
+    %1447 = "ttir.relu"(%1445, %1446) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1448 = tensor.empty() : tensor<1x64xf32>
+    %1449 = "ttir.relu"(%1447, %1448) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1450 = tensor.empty() : tensor<1x64xf32>
+    %1451 = "ttir.relu"(%1449, %1450) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1452 = tensor.empty() : tensor<1x64xf32>
+    %1453 = "ttir.relu"(%1451, %1452) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1454 = tensor.empty() : tensor<1x64xf32>
+    %1455 = "ttir.relu"(%1453, %1454) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1456 = tensor.empty() : tensor<1x64xf32>
+    %1457 = "ttir.relu"(%1455, %1456) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1458 = tensor.empty() : tensor<1x64xf32>
+    %1459 = "ttir.relu"(%1457, %1458) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1460 = tensor.empty() : tensor<1x64xf32>
+    %1461 = "ttir.relu"(%1459, %1460) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1462 = tensor.empty() : tensor<1x64xf32>
+    %1463 = "ttir.relu"(%1461, %1462) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1464 = tensor.empty() : tensor<1x64xf32>
+    %1465 = "ttir.relu"(%1463, %1464) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1466 = tensor.empty() : tensor<1x64xf32>
+    %1467 = "ttir.relu"(%1465, %1466) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1468 = tensor.empty() : tensor<1x64xf32>
+    %1469 = "ttir.relu"(%1467, %1468) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1470 = tensor.empty() : tensor<1x64xf32>
+    %1471 = "ttir.relu"(%1469, %1470) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1472 = tensor.empty() : tensor<1x64xf32>
+    %1473 = "ttir.relu"(%1471, %1472) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1474 = tensor.empty() : tensor<1x64xf32>
+    %1475 = "ttir.relu"(%1473, %1474) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1476 = tensor.empty() : tensor<1x64xf32>
+    %1477 = "ttir.relu"(%1475, %1476) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1478 = tensor.empty() : tensor<1x64xf32>
+    %1479 = "ttir.relu"(%1477, %1478) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1480 = tensor.empty() : tensor<1x64xf32>
+    %1481 = "ttir.relu"(%1479, %1480) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1482 = tensor.empty() : tensor<1x64xf32>
+    %1483 = "ttir.relu"(%1481, %1482) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1484 = tensor.empty() : tensor<1x64xf32>
+    %1485 = "ttir.relu"(%1483, %1484) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1486 = tensor.empty() : tensor<1x64xf32>
+    %1487 = "ttir.relu"(%1485, %1486) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1488 = tensor.empty() : tensor<1x64xf32>
+    %1489 = "ttir.relu"(%1487, %1488) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1490 = tensor.empty() : tensor<1x64xf32>
+    %1491 = "ttir.relu"(%1489, %1490) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1492 = tensor.empty() : tensor<1x64xf32>
+    %1493 = "ttir.relu"(%1491, %1492) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1494 = tensor.empty() : tensor<1x64xf32>
+    %1495 = "ttir.relu"(%1493, %1494) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1496 = tensor.empty() : tensor<1x64xf32>
+    %1497 = "ttir.relu"(%1495, %1496) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1498 = tensor.empty() : tensor<1x64xf32>
+    %1499 = "ttir.relu"(%1497, %1498) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1500 = tensor.empty() : tensor<1x64xf32>
+    %1501 = "ttir.relu"(%1499, %1500) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1502 = tensor.empty() : tensor<1x64xf32>
+    %1503 = "ttir.relu"(%1501, %1502) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1504 = tensor.empty() : tensor<1x64xf32>
+    %1505 = "ttir.relu"(%1503, %1504) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1506 = tensor.empty() : tensor<1x64xf32>
+    %1507 = "ttir.relu"(%1505, %1506) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1508 = tensor.empty() : tensor<1x64xf32>
+    %1509 = "ttir.relu"(%1507, %1508) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1510 = tensor.empty() : tensor<1x64xf32>
+    %1511 = "ttir.relu"(%1509, %1510) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1512 = tensor.empty() : tensor<1x64xf32>
+    %1513 = "ttir.relu"(%1511, %1512) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1514 = tensor.empty() : tensor<1x64xf32>
+    %1515 = "ttir.relu"(%1513, %1514) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1516 = tensor.empty() : tensor<1x64xf32>
+    %1517 = "ttir.relu"(%1515, %1516) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1518 = tensor.empty() : tensor<1x64xf32>
+    %1519 = "ttir.relu"(%1517, %1518) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1520 = tensor.empty() : tensor<1x64xf32>
+    %1521 = "ttir.relu"(%1519, %1520) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1522 = tensor.empty() : tensor<1x64xf32>
+    %1523 = "ttir.relu"(%1521, %1522) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1524 = tensor.empty() : tensor<1x64xf32>
+    %1525 = "ttir.relu"(%1523, %1524) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1526 = tensor.empty() : tensor<1x64xf32>
+    %1527 = "ttir.relu"(%1525, %1526) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1528 = tensor.empty() : tensor<1x64xf32>
+    %1529 = "ttir.relu"(%1527, %1528) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1530 = tensor.empty() : tensor<1x64xf32>
+    %1531 = "ttir.relu"(%1529, %1530) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1532 = tensor.empty() : tensor<1x64xf32>
+    %1533 = "ttir.relu"(%1531, %1532) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1534 = tensor.empty() : tensor<1x64xf32>
+    %1535 = "ttir.relu"(%1533, %1534) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1536 = tensor.empty() : tensor<1x64xf32>
+    %1537 = "ttir.relu"(%1535, %1536) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1538 = tensor.empty() : tensor<1x64xf32>
+    %1539 = "ttir.relu"(%1537, %1538) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1540 = tensor.empty() : tensor<1x64xf32>
+    %1541 = "ttir.relu"(%1539, %1540) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1542 = tensor.empty() : tensor<1x64xf32>
+    %1543 = "ttir.relu"(%1541, %1542) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1544 = tensor.empty() : tensor<1x64xf32>
+    %1545 = "ttir.relu"(%1543, %1544) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1546 = tensor.empty() : tensor<1x64xf32>
+    %1547 = "ttir.relu"(%1545, %1546) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1548 = tensor.empty() : tensor<1x64xf32>
+    %1549 = "ttir.relu"(%1547, %1548) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1550 = tensor.empty() : tensor<1x64xf32>
+    %1551 = "ttir.relu"(%1549, %1550) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1552 = tensor.empty() : tensor<1x64xf32>
+    %1553 = "ttir.relu"(%1551, %1552) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1554 = tensor.empty() : tensor<1x64xf32>
+    %1555 = "ttir.relu"(%1553, %1554) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1556 = tensor.empty() : tensor<1x64xf32>
+    %1557 = "ttir.relu"(%1555, %1556) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1558 = tensor.empty() : tensor<1x64xf32>
+    %1559 = "ttir.relu"(%1557, %1558) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1560 = tensor.empty() : tensor<1x64xf32>
+    %1561 = "ttir.relu"(%1559, %1560) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1562 = tensor.empty() : tensor<1x64xf32>
+    %1563 = "ttir.relu"(%1561, %1562) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1564 = tensor.empty() : tensor<1x64xf32>
+    %1565 = "ttir.relu"(%1563, %1564) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1566 = tensor.empty() : tensor<1x64xf32>
+    %1567 = "ttir.relu"(%1565, %1566) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1568 = tensor.empty() : tensor<1x64xf32>
+    %1569 = "ttir.relu"(%1567, %1568) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1570 = tensor.empty() : tensor<1x64xf32>
+    %1571 = "ttir.relu"(%1569, %1570) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1572 = tensor.empty() : tensor<1x64xf32>
+    %1573 = "ttir.relu"(%1571, %1572) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1574 = tensor.empty() : tensor<1x64xf32>
+    %1575 = "ttir.relu"(%1573, %1574) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1576 = tensor.empty() : tensor<1x64xf32>
+    %1577 = "ttir.relu"(%1575, %1576) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1578 = tensor.empty() : tensor<1x64xf32>
+    %1579 = "ttir.relu"(%1577, %1578) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1580 = tensor.empty() : tensor<1x64xf32>
+    %1581 = "ttir.relu"(%1579, %1580) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1582 = tensor.empty() : tensor<1x64xf32>
+    %1583 = "ttir.relu"(%1581, %1582) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1584 = tensor.empty() : tensor<1x64xf32>
+    %1585 = "ttir.relu"(%1583, %1584) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1586 = tensor.empty() : tensor<1x64xf32>
+    %1587 = "ttir.relu"(%1585, %1586) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1588 = tensor.empty() : tensor<1x64xf32>
+    %1589 = "ttir.relu"(%1587, %1588) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1590 = tensor.empty() : tensor<1x64xf32>
+    %1591 = "ttir.relu"(%1589, %1590) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1592 = tensor.empty() : tensor<1x64xf32>
+    %1593 = "ttir.relu"(%1591, %1592) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1594 = tensor.empty() : tensor<1x64xf32>
+    %1595 = "ttir.relu"(%1593, %1594) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1596 = tensor.empty() : tensor<1x64xf32>
+    %1597 = "ttir.relu"(%1595, %1596) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1598 = tensor.empty() : tensor<1x64xf32>
+    %1599 = "ttir.relu"(%1597, %1598) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1600 = tensor.empty() : tensor<1x64xf32>
+    %1601 = "ttir.relu"(%1599, %1600) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1602 = tensor.empty() : tensor<1x64xf32>
+    %1603 = "ttir.relu"(%1601, %1602) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1604 = tensor.empty() : tensor<1x64xf32>
+    %1605 = "ttir.relu"(%1603, %1604) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1606 = tensor.empty() : tensor<1x64xf32>
+    %1607 = "ttir.relu"(%1605, %1606) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1608 = tensor.empty() : tensor<1x64xf32>
+    %1609 = "ttir.relu"(%1607, %1608) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1610 = tensor.empty() : tensor<1x64xf32>
+    %1611 = "ttir.relu"(%1609, %1610) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1612 = tensor.empty() : tensor<1x64xf32>
+    %1613 = "ttir.relu"(%1611, %1612) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1614 = tensor.empty() : tensor<1x64xf32>
+    %1615 = "ttir.relu"(%1613, %1614) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1616 = tensor.empty() : tensor<1x64xf32>
+    %1617 = "ttir.relu"(%1615, %1616) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1618 = tensor.empty() : tensor<1x64xf32>
+    %1619 = "ttir.relu"(%1617, %1618) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1620 = tensor.empty() : tensor<1x64xf32>
+    %1621 = "ttir.relu"(%1619, %1620) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1622 = tensor.empty() : tensor<1x64xf32>
+    %1623 = "ttir.relu"(%1621, %1622) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1624 = tensor.empty() : tensor<1x64xf32>
+    %1625 = "ttir.relu"(%1623, %1624) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1626 = tensor.empty() : tensor<1x64xf32>
+    %1627 = "ttir.relu"(%1625, %1626) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1628 = tensor.empty() : tensor<1x64xf32>
+    %1629 = "ttir.relu"(%1627, %1628) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1630 = tensor.empty() : tensor<1x64xf32>
+    %1631 = "ttir.relu"(%1629, %1630) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1632 = tensor.empty() : tensor<1x64xf32>
+    %1633 = "ttir.relu"(%1631, %1632) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1634 = tensor.empty() : tensor<1x64xf32>
+    %1635 = "ttir.relu"(%1633, %1634) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1636 = tensor.empty() : tensor<1x64xf32>
+    %1637 = "ttir.relu"(%1635, %1636) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1638 = tensor.empty() : tensor<1x64xf32>
+    %1639 = "ttir.relu"(%1637, %1638) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1640 = tensor.empty() : tensor<1x64xf32>
+    %1641 = "ttir.relu"(%1639, %1640) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1642 = tensor.empty() : tensor<1x64xf32>
+    %1643 = "ttir.relu"(%1641, %1642) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1644 = tensor.empty() : tensor<1x64xf32>
+    %1645 = "ttir.relu"(%1643, %1644) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1646 = tensor.empty() : tensor<1x64xf32>
+    %1647 = "ttir.relu"(%1645, %1646) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1648 = tensor.empty() : tensor<1x64xf32>
+    %1649 = "ttir.relu"(%1647, %1648) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1650 = tensor.empty() : tensor<1x64xf32>
+    %1651 = "ttir.relu"(%1649, %1650) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1652 = tensor.empty() : tensor<1x64xf32>
+    %1653 = "ttir.relu"(%1651, %1652) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1654 = tensor.empty() : tensor<1x64xf32>
+    %1655 = "ttir.relu"(%1653, %1654) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1656 = tensor.empty() : tensor<1x64xf32>
+    %1657 = "ttir.relu"(%1655, %1656) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1658 = tensor.empty() : tensor<1x64xf32>
+    %1659 = "ttir.relu"(%1657, %1658) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1660 = tensor.empty() : tensor<1x64xf32>
+    %1661 = "ttir.relu"(%1659, %1660) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1662 = tensor.empty() : tensor<1x64xf32>
+    %1663 = "ttir.relu"(%1661, %1662) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1664 = tensor.empty() : tensor<1x64xf32>
+    %1665 = "ttir.relu"(%1663, %1664) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1666 = tensor.empty() : tensor<1x64xf32>
+    %1667 = "ttir.relu"(%1665, %1666) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1668 = tensor.empty() : tensor<1x64xf32>
+    %1669 = "ttir.relu"(%1667, %1668) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1670 = tensor.empty() : tensor<1x64xf32>
+    %1671 = "ttir.relu"(%1669, %1670) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1672 = tensor.empty() : tensor<1x64xf32>
+    %1673 = "ttir.relu"(%1671, %1672) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1674 = tensor.empty() : tensor<1x64xf32>
+    %1675 = "ttir.relu"(%1673, %1674) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1676 = tensor.empty() : tensor<1x64xf32>
+    %1677 = "ttir.relu"(%1675, %1676) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1678 = tensor.empty() : tensor<1x64xf32>
+    %1679 = "ttir.relu"(%1677, %1678) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1680 = tensor.empty() : tensor<1x64xf32>
+    %1681 = "ttir.relu"(%1679, %1680) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1682 = tensor.empty() : tensor<1x64xf32>
+    %1683 = "ttir.relu"(%1681, %1682) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1684 = tensor.empty() : tensor<1x64xf32>
+    %1685 = "ttir.relu"(%1683, %1684) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1686 = tensor.empty() : tensor<1x64xf32>
+    %1687 = "ttir.relu"(%1685, %1686) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1688 = tensor.empty() : tensor<1x64xf32>
+    %1689 = "ttir.relu"(%1687, %1688) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1690 = tensor.empty() : tensor<1x64xf32>
+    %1691 = "ttir.relu"(%1689, %1690) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1692 = tensor.empty() : tensor<1x64xf32>
+    %1693 = "ttir.relu"(%1691, %1692) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1694 = tensor.empty() : tensor<1x64xf32>
+    %1695 = "ttir.relu"(%1693, %1694) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1696 = tensor.empty() : tensor<1x64xf32>
+    %1697 = "ttir.relu"(%1695, %1696) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1698 = tensor.empty() : tensor<1x64xf32>
+    %1699 = "ttir.relu"(%1697, %1698) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1700 = tensor.empty() : tensor<1x64xf32>
+    %1701 = "ttir.relu"(%1699, %1700) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1702 = tensor.empty() : tensor<1x64xf32>
+    %1703 = "ttir.relu"(%1701, %1702) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1704 = tensor.empty() : tensor<1x64xf32>
+    %1705 = "ttir.relu"(%1703, %1704) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1706 = tensor.empty() : tensor<1x64xf32>
+    %1707 = "ttir.relu"(%1705, %1706) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1708 = tensor.empty() : tensor<1x64xf32>
+    %1709 = "ttir.relu"(%1707, %1708) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1710 = tensor.empty() : tensor<1x64xf32>
+    %1711 = "ttir.relu"(%1709, %1710) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1712 = tensor.empty() : tensor<1x64xf32>
+    %1713 = "ttir.relu"(%1711, %1712) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1714 = tensor.empty() : tensor<1x64xf32>
+    %1715 = "ttir.relu"(%1713, %1714) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1716 = tensor.empty() : tensor<1x64xf32>
+    %1717 = "ttir.relu"(%1715, %1716) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1718 = tensor.empty() : tensor<1x64xf32>
+    %1719 = "ttir.relu"(%1717, %1718) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1720 = tensor.empty() : tensor<1x64xf32>
+    %1721 = "ttir.relu"(%1719, %1720) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1722 = tensor.empty() : tensor<1x64xf32>
+    %1723 = "ttir.relu"(%1721, %1722) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1724 = tensor.empty() : tensor<1x64xf32>
+    %1725 = "ttir.relu"(%1723, %1724) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1726 = tensor.empty() : tensor<1x64xf32>
+    %1727 = "ttir.relu"(%1725, %1726) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1728 = tensor.empty() : tensor<1x64xf32>
+    %1729 = "ttir.relu"(%1727, %1728) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1730 = tensor.empty() : tensor<1x64xf32>
+    %1731 = "ttir.relu"(%1729, %1730) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1732 = tensor.empty() : tensor<1x64xf32>
+    %1733 = "ttir.relu"(%1731, %1732) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1734 = tensor.empty() : tensor<1x64xf32>
+    %1735 = "ttir.relu"(%1733, %1734) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1736 = tensor.empty() : tensor<1x64xf32>
+    %1737 = "ttir.relu"(%1735, %1736) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1738 = tensor.empty() : tensor<1x64xf32>
+    %1739 = "ttir.relu"(%1737, %1738) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1740 = tensor.empty() : tensor<1x64xf32>
+    %1741 = "ttir.relu"(%1739, %1740) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1742 = tensor.empty() : tensor<1x64xf32>
+    %1743 = "ttir.relu"(%1741, %1742) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1744 = tensor.empty() : tensor<1x64xf32>
+    %1745 = "ttir.relu"(%1743, %1744) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1746 = tensor.empty() : tensor<1x64xf32>
+    %1747 = "ttir.relu"(%1745, %1746) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1748 = tensor.empty() : tensor<1x64xf32>
+    %1749 = "ttir.relu"(%1747, %1748) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1750 = tensor.empty() : tensor<1x64xf32>
+    %1751 = "ttir.relu"(%1749, %1750) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1752 = tensor.empty() : tensor<1x64xf32>
+    %1753 = "ttir.relu"(%1751, %1752) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1754 = tensor.empty() : tensor<1x64xf32>
+    %1755 = "ttir.relu"(%1753, %1754) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1756 = tensor.empty() : tensor<1x64xf32>
+    %1757 = "ttir.relu"(%1755, %1756) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1758 = tensor.empty() : tensor<1x64xf32>
+    %1759 = "ttir.relu"(%1757, %1758) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1760 = tensor.empty() : tensor<1x64xf32>
+    %1761 = "ttir.relu"(%1759, %1760) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1762 = tensor.empty() : tensor<1x64xf32>
+    %1763 = "ttir.relu"(%1761, %1762) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1764 = tensor.empty() : tensor<1x64xf32>
+    %1765 = "ttir.relu"(%1763, %1764) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1766 = tensor.empty() : tensor<1x64xf32>
+    %1767 = "ttir.relu"(%1765, %1766) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1768 = tensor.empty() : tensor<1x64xf32>
+    %1769 = "ttir.relu"(%1767, %1768) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1770 = tensor.empty() : tensor<1x64xf32>
+    %1771 = "ttir.relu"(%1769, %1770) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1772 = tensor.empty() : tensor<1x64xf32>
+    %1773 = "ttir.relu"(%1771, %1772) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1774 = tensor.empty() : tensor<1x64xf32>
+    %1775 = "ttir.relu"(%1773, %1774) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1776 = tensor.empty() : tensor<1x64xf32>
+    %1777 = "ttir.relu"(%1775, %1776) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1778 = tensor.empty() : tensor<1x64xf32>
+    %1779 = "ttir.relu"(%1777, %1778) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1780 = tensor.empty() : tensor<1x64xf32>
+    %1781 = "ttir.relu"(%1779, %1780) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1782 = tensor.empty() : tensor<1x64xf32>
+    %1783 = "ttir.relu"(%1781, %1782) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1784 = tensor.empty() : tensor<1x64xf32>
+    %1785 = "ttir.relu"(%1783, %1784) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1786 = tensor.empty() : tensor<1x64xf32>
+    %1787 = "ttir.relu"(%1785, %1786) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1788 = tensor.empty() : tensor<1x64xf32>
+    %1789 = "ttir.relu"(%1787, %1788) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1790 = tensor.empty() : tensor<1x64xf32>
+    %1791 = "ttir.relu"(%1789, %1790) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1792 = tensor.empty() : tensor<1x64xf32>
+    %1793 = "ttir.relu"(%1791, %1792) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1794 = tensor.empty() : tensor<1x64xf32>
+    %1795 = "ttir.relu"(%1793, %1794) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1796 = tensor.empty() : tensor<1x64xf32>
+    %1797 = "ttir.relu"(%1795, %1796) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1798 = tensor.empty() : tensor<1x64xf32>
+    %1799 = "ttir.relu"(%1797, %1798) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1800 = tensor.empty() : tensor<1x64xf32>
+    %1801 = "ttir.relu"(%1799, %1800) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1802 = tensor.empty() : tensor<1x64xf32>
+    %1803 = "ttir.relu"(%1801, %1802) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1804 = tensor.empty() : tensor<1x64xf32>
+    %1805 = "ttir.relu"(%1803, %1804) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1806 = tensor.empty() : tensor<1x64xf32>
+    %1807 = "ttir.relu"(%1805, %1806) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1808 = tensor.empty() : tensor<1x64xf32>
+    %1809 = "ttir.relu"(%1807, %1808) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1810 = tensor.empty() : tensor<1x64xf32>
+    %1811 = "ttir.relu"(%1809, %1810) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1812 = tensor.empty() : tensor<1x64xf32>
+    %1813 = "ttir.relu"(%1811, %1812) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1814 = tensor.empty() : tensor<1x64xf32>
+    %1815 = "ttir.relu"(%1813, %1814) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1816 = tensor.empty() : tensor<1x64xf32>
+    %1817 = "ttir.relu"(%1815, %1816) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1818 = tensor.empty() : tensor<1x64xf32>
+    %1819 = "ttir.relu"(%1817, %1818) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1820 = tensor.empty() : tensor<1x64xf32>
+    %1821 = "ttir.relu"(%1819, %1820) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1822 = tensor.empty() : tensor<1x64xf32>
+    %1823 = "ttir.relu"(%1821, %1822) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1824 = tensor.empty() : tensor<1x64xf32>
+    %1825 = "ttir.relu"(%1823, %1824) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1826 = tensor.empty() : tensor<1x64xf32>
+    %1827 = "ttir.relu"(%1825, %1826) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1828 = tensor.empty() : tensor<1x64xf32>
+    %1829 = "ttir.relu"(%1827, %1828) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1830 = tensor.empty() : tensor<1x64xf32>
+    %1831 = "ttir.relu"(%1829, %1830) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1832 = tensor.empty() : tensor<1x64xf32>
+    %1833 = "ttir.relu"(%1831, %1832) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1834 = tensor.empty() : tensor<1x64xf32>
+    %1835 = "ttir.relu"(%1833, %1834) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1836 = tensor.empty() : tensor<1x64xf32>
+    %1837 = "ttir.relu"(%1835, %1836) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1838 = tensor.empty() : tensor<1x64xf32>
+    %1839 = "ttir.relu"(%1837, %1838) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1840 = tensor.empty() : tensor<1x64xf32>
+    %1841 = "ttir.relu"(%1839, %1840) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1842 = tensor.empty() : tensor<1x64xf32>
+    %1843 = "ttir.relu"(%1841, %1842) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1844 = tensor.empty() : tensor<1x64xf32>
+    %1845 = "ttir.relu"(%1843, %1844) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1846 = tensor.empty() : tensor<1x64xf32>
+    %1847 = "ttir.relu"(%1845, %1846) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1848 = tensor.empty() : tensor<1x64xf32>
+    %1849 = "ttir.relu"(%1847, %1848) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1850 = tensor.empty() : tensor<1x64xf32>
+    %1851 = "ttir.relu"(%1849, %1850) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1852 = tensor.empty() : tensor<1x64xf32>
+    %1853 = "ttir.relu"(%1851, %1852) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1854 = tensor.empty() : tensor<1x64xf32>
+    %1855 = "ttir.relu"(%1853, %1854) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1856 = tensor.empty() : tensor<1x64xf32>
+    %1857 = "ttir.relu"(%1855, %1856) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1858 = tensor.empty() : tensor<1x64xf32>
+    %1859 = "ttir.relu"(%1857, %1858) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1860 = tensor.empty() : tensor<1x64xf32>
+    %1861 = "ttir.relu"(%1859, %1860) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1862 = tensor.empty() : tensor<1x64xf32>
+    %1863 = "ttir.relu"(%1861, %1862) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1864 = tensor.empty() : tensor<1x64xf32>
+    %1865 = "ttir.relu"(%1863, %1864) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1866 = tensor.empty() : tensor<1x64xf32>
+    %1867 = "ttir.relu"(%1865, %1866) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1868 = tensor.empty() : tensor<1x64xf32>
+    %1869 = "ttir.relu"(%1867, %1868) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1870 = tensor.empty() : tensor<1x64xf32>
+    %1871 = "ttir.relu"(%1869, %1870) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1872 = tensor.empty() : tensor<1x64xf32>
+    %1873 = "ttir.relu"(%1871, %1872) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1874 = tensor.empty() : tensor<1x64xf32>
+    %1875 = "ttir.relu"(%1873, %1874) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1876 = tensor.empty() : tensor<1x64xf32>
+    %1877 = "ttir.relu"(%1875, %1876) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1878 = tensor.empty() : tensor<1x64xf32>
+    %1879 = "ttir.relu"(%1877, %1878) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1880 = tensor.empty() : tensor<1x64xf32>
+    %1881 = "ttir.relu"(%1879, %1880) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1882 = tensor.empty() : tensor<1x64xf32>
+    %1883 = "ttir.relu"(%1881, %1882) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1884 = tensor.empty() : tensor<1x64xf32>
+    %1885 = "ttir.relu"(%1883, %1884) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1886 = tensor.empty() : tensor<1x64xf32>
+    %1887 = "ttir.relu"(%1885, %1886) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1888 = tensor.empty() : tensor<1x64xf32>
+    %1889 = "ttir.relu"(%1887, %1888) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1890 = tensor.empty() : tensor<1x64xf32>
+    %1891 = "ttir.relu"(%1889, %1890) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1892 = tensor.empty() : tensor<1x64xf32>
+    %1893 = "ttir.relu"(%1891, %1892) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1894 = tensor.empty() : tensor<1x64xf32>
+    %1895 = "ttir.relu"(%1893, %1894) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1896 = tensor.empty() : tensor<1x64xf32>
+    %1897 = "ttir.relu"(%1895, %1896) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1898 = tensor.empty() : tensor<1x64xf32>
+    %1899 = "ttir.relu"(%1897, %1898) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1900 = tensor.empty() : tensor<1x64xf32>
+    %1901 = "ttir.relu"(%1899, %1900) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1902 = tensor.empty() : tensor<1x64xf32>
+    %1903 = "ttir.relu"(%1901, %1902) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1904 = tensor.empty() : tensor<1x64xf32>
+    %1905 = "ttir.relu"(%1903, %1904) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1906 = tensor.empty() : tensor<1x64xf32>
+    %1907 = "ttir.relu"(%1905, %1906) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1908 = tensor.empty() : tensor<1x64xf32>
+    %1909 = "ttir.relu"(%1907, %1908) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1910 = tensor.empty() : tensor<1x64xf32>
+    %1911 = "ttir.relu"(%1909, %1910) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1912 = tensor.empty() : tensor<1x64xf32>
+    %1913 = "ttir.relu"(%1911, %1912) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1914 = tensor.empty() : tensor<1x64xf32>
+    %1915 = "ttir.relu"(%1913, %1914) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1916 = tensor.empty() : tensor<1x64xf32>
+    %1917 = "ttir.relu"(%1915, %1916) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1918 = tensor.empty() : tensor<1x64xf32>
+    %1919 = "ttir.relu"(%1917, %1918) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1920 = tensor.empty() : tensor<1x64xf32>
+    %1921 = "ttir.relu"(%1919, %1920) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1922 = tensor.empty() : tensor<1x64xf32>
+    %1923 = "ttir.relu"(%1921, %1922) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1924 = tensor.empty() : tensor<1x64xf32>
+    %1925 = "ttir.relu"(%1923, %1924) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1926 = tensor.empty() : tensor<1x64xf32>
+    %1927 = "ttir.relu"(%1925, %1926) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1928 = tensor.empty() : tensor<1x64xf32>
+    %1929 = "ttir.relu"(%1927, %1928) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1930 = tensor.empty() : tensor<1x64xf32>
+    %1931 = "ttir.relu"(%1929, %1930) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1932 = tensor.empty() : tensor<1x64xf32>
+    %1933 = "ttir.relu"(%1931, %1932) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1934 = tensor.empty() : tensor<1x64xf32>
+    %1935 = "ttir.relu"(%1933, %1934) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1936 = tensor.empty() : tensor<1x64xf32>
+    %1937 = "ttir.relu"(%1935, %1936) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1938 = tensor.empty() : tensor<1x64xf32>
+    %1939 = "ttir.relu"(%1937, %1938) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1940 = tensor.empty() : tensor<1x64xf32>
+    %1941 = "ttir.relu"(%1939, %1940) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1942 = tensor.empty() : tensor<1x64xf32>
+    %1943 = "ttir.relu"(%1941, %1942) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1944 = tensor.empty() : tensor<1x64xf32>
+    %1945 = "ttir.relu"(%1943, %1944) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1946 = tensor.empty() : tensor<1x64xf32>
+    %1947 = "ttir.relu"(%1945, %1946) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1948 = tensor.empty() : tensor<1x64xf32>
+    %1949 = "ttir.relu"(%1947, %1948) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1950 = tensor.empty() : tensor<1x64xf32>
+    %1951 = "ttir.relu"(%1949, %1950) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1952 = tensor.empty() : tensor<1x64xf32>
+    %1953 = "ttir.relu"(%1951, %1952) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1954 = tensor.empty() : tensor<1x64xf32>
+    %1955 = "ttir.relu"(%1953, %1954) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1956 = tensor.empty() : tensor<1x64xf32>
+    %1957 = "ttir.relu"(%1955, %1956) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1958 = tensor.empty() : tensor<1x64xf32>
+    %1959 = "ttir.relu"(%1957, %1958) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1960 = tensor.empty() : tensor<1x64xf32>
+    %1961 = "ttir.relu"(%1959, %1960) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1962 = tensor.empty() : tensor<1x64xf32>
+    %1963 = "ttir.relu"(%1961, %1962) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1964 = tensor.empty() : tensor<1x64xf32>
+    %1965 = "ttir.relu"(%1963, %1964) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1966 = tensor.empty() : tensor<1x64xf32>
+    %1967 = "ttir.relu"(%1965, %1966) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1968 = tensor.empty() : tensor<1x64xf32>
+    %1969 = "ttir.relu"(%1967, %1968) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1970 = tensor.empty() : tensor<1x64xf32>
+    %1971 = "ttir.relu"(%1969, %1970) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1972 = tensor.empty() : tensor<1x64xf32>
+    %1973 = "ttir.relu"(%1971, %1972) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1974 = tensor.empty() : tensor<1x64xf32>
+    %1975 = "ttir.relu"(%1973, %1974) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1976 = tensor.empty() : tensor<1x64xf32>
+    %1977 = "ttir.relu"(%1975, %1976) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1978 = tensor.empty() : tensor<1x64xf32>
+    %1979 = "ttir.relu"(%1977, %1978) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1980 = tensor.empty() : tensor<1x64xf32>
+    %1981 = "ttir.relu"(%1979, %1980) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1982 = tensor.empty() : tensor<1x64xf32>
+    %1983 = "ttir.relu"(%1981, %1982) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1984 = tensor.empty() : tensor<1x64xf32>
+    %1985 = "ttir.relu"(%1983, %1984) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1986 = tensor.empty() : tensor<1x64xf32>
+    %1987 = "ttir.relu"(%1985, %1986) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1988 = tensor.empty() : tensor<1x64xf32>
+    %1989 = "ttir.relu"(%1987, %1988) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1990 = tensor.empty() : tensor<1x64xf32>
+    %1991 = "ttir.relu"(%1989, %1990) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1992 = tensor.empty() : tensor<1x64xf32>
+    %1993 = "ttir.relu"(%1991, %1992) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1994 = tensor.empty() : tensor<1x64xf32>
+    %1995 = "ttir.relu"(%1993, %1994) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1996 = tensor.empty() : tensor<1x64xf32>
+    %1997 = "ttir.relu"(%1995, %1996) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %1998 = tensor.empty() : tensor<1x64xf32>
+    %1999 = "ttir.relu"(%1997, %1998) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2000 = tensor.empty() : tensor<1x64xf32>
+    %2001 = "ttir.relu"(%1999, %2000) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2002 = tensor.empty() : tensor<1x64xf32>
+    %2003 = "ttir.relu"(%2001, %2002) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2004 = tensor.empty() : tensor<1x64xf32>
+    %2005 = "ttir.relu"(%2003, %2004) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2006 = tensor.empty() : tensor<1x64xf32>
+    %2007 = "ttir.relu"(%2005, %2006) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2008 = tensor.empty() : tensor<1x64xf32>
+    %2009 = "ttir.relu"(%2007, %2008) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2010 = tensor.empty() : tensor<1x64xf32>
+    %2011 = "ttir.relu"(%2009, %2010) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2012 = tensor.empty() : tensor<1x64xf32>
+    %2013 = "ttir.relu"(%2011, %2012) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2014 = tensor.empty() : tensor<1x64xf32>
+    %2015 = "ttir.relu"(%2013, %2014) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2016 = tensor.empty() : tensor<1x64xf32>
+    %2017 = "ttir.relu"(%2015, %2016) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2018 = tensor.empty() : tensor<1x64xf32>
+    %2019 = "ttir.relu"(%2017, %2018) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2020 = tensor.empty() : tensor<1x64xf32>
+    %2021 = "ttir.relu"(%2019, %2020) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2022 = tensor.empty() : tensor<1x64xf32>
+    %2023 = "ttir.relu"(%2021, %2022) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2024 = tensor.empty() : tensor<1x64xf32>
+    %2025 = "ttir.relu"(%2023, %2024) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2026 = tensor.empty() : tensor<1x64xf32>
+    %2027 = "ttir.relu"(%2025, %2026) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2028 = tensor.empty() : tensor<1x64xf32>
+    %2029 = "ttir.relu"(%2027, %2028) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2030 = tensor.empty() : tensor<1x64xf32>
+    %2031 = "ttir.relu"(%2029, %2030) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2032 = tensor.empty() : tensor<1x64xf32>
+    %2033 = "ttir.relu"(%2031, %2032) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2034 = tensor.empty() : tensor<1x64xf32>
+    %2035 = "ttir.relu"(%2033, %2034) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2036 = tensor.empty() : tensor<1x64xf32>
+    %2037 = "ttir.relu"(%2035, %2036) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2038 = tensor.empty() : tensor<1x64xf32>
+    %2039 = "ttir.relu"(%2037, %2038) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2040 = tensor.empty() : tensor<1x64xf32>
+    %2041 = "ttir.relu"(%2039, %2040) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2042 = tensor.empty() : tensor<1x64xf32>
+    %2043 = "ttir.relu"(%2041, %2042) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2044 = tensor.empty() : tensor<1x64xf32>
+    %2045 = "ttir.relu"(%2043, %2044) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2046 = tensor.empty() : tensor<1x64xf32>
+    %2047 = "ttir.relu"(%2045, %2046) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2048 = tensor.empty() : tensor<1x64xf32>
+    %2049 = "ttir.relu"(%2047, %2048) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2050 = tensor.empty() : tensor<1x64xf32>
+    %2051 = "ttir.relu"(%2049, %2050) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2052 = tensor.empty() : tensor<1x64xf32>
+    %2053 = "ttir.relu"(%2051, %2052) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2054 = tensor.empty() : tensor<1x64xf32>
+    %2055 = "ttir.relu"(%2053, %2054) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2056 = tensor.empty() : tensor<1x64xf32>
+    %2057 = "ttir.relu"(%2055, %2056) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2058 = tensor.empty() : tensor<1x64xf32>
+    %2059 = "ttir.relu"(%2057, %2058) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2060 = tensor.empty() : tensor<1x64xf32>
+    %2061 = "ttir.relu"(%2059, %2060) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2062 = tensor.empty() : tensor<1x64xf32>
+    %2063 = "ttir.relu"(%2061, %2062) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2064 = tensor.empty() : tensor<1x64xf32>
+    %2065 = "ttir.relu"(%2063, %2064) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2066 = tensor.empty() : tensor<1x64xf32>
+    %2067 = "ttir.relu"(%2065, %2066) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2068 = tensor.empty() : tensor<1x64xf32>
+    %2069 = "ttir.relu"(%2067, %2068) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2070 = tensor.empty() : tensor<1x64xf32>
+    %2071 = "ttir.relu"(%2069, %2070) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2072 = tensor.empty() : tensor<1x64xf32>
+    %2073 = "ttir.relu"(%2071, %2072) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2074 = tensor.empty() : tensor<1x64xf32>
+    %2075 = "ttir.relu"(%2073, %2074) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2076 = tensor.empty() : tensor<1x64xf32>
+    %2077 = "ttir.relu"(%2075, %2076) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2078 = tensor.empty() : tensor<1x64xf32>
+    %2079 = "ttir.relu"(%2077, %2078) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2080 = tensor.empty() : tensor<1x64xf32>
+    %2081 = "ttir.relu"(%2079, %2080) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2082 = tensor.empty() : tensor<1x64xf32>
+    %2083 = "ttir.relu"(%2081, %2082) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2084 = tensor.empty() : tensor<1x64xf32>
+    %2085 = "ttir.relu"(%2083, %2084) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2086 = tensor.empty() : tensor<1x64xf32>
+    %2087 = "ttir.relu"(%2085, %2086) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2088 = tensor.empty() : tensor<1x64xf32>
+    %2089 = "ttir.relu"(%2087, %2088) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2090 = tensor.empty() : tensor<1x64xf32>
+    %2091 = "ttir.relu"(%2089, %2090) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2092 = tensor.empty() : tensor<1x64xf32>
+    %2093 = "ttir.relu"(%2091, %2092) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2094 = tensor.empty() : tensor<1x64xf32>
+    %2095 = "ttir.relu"(%2093, %2094) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2096 = tensor.empty() : tensor<1x64xf32>
+    %2097 = "ttir.relu"(%2095, %2096) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2098 = tensor.empty() : tensor<1x64xf32>
+    %2099 = "ttir.relu"(%2097, %2098) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2100 = tensor.empty() : tensor<1x64xf32>
+    %2101 = "ttir.relu"(%2099, %2100) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2102 = tensor.empty() : tensor<1x64xf32>
+    %2103 = "ttir.relu"(%2101, %2102) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2104 = tensor.empty() : tensor<1x64xf32>
+    %2105 = "ttir.relu"(%2103, %2104) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2106 = tensor.empty() : tensor<1x64xf32>
+    %2107 = "ttir.relu"(%2105, %2106) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2108 = tensor.empty() : tensor<1x64xf32>
+    %2109 = "ttir.relu"(%2107, %2108) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2110 = tensor.empty() : tensor<1x64xf32>
+    %2111 = "ttir.relu"(%2109, %2110) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2112 = tensor.empty() : tensor<1x64xf32>
+    %2113 = "ttir.relu"(%2111, %2112) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2114 = tensor.empty() : tensor<1x64xf32>
+    %2115 = "ttir.relu"(%2113, %2114) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2116 = tensor.empty() : tensor<1x64xf32>
+    %2117 = "ttir.relu"(%2115, %2116) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2118 = tensor.empty() : tensor<1x64xf32>
+    %2119 = "ttir.relu"(%2117, %2118) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2120 = tensor.empty() : tensor<1x64xf32>
+    %2121 = "ttir.relu"(%2119, %2120) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2122 = tensor.empty() : tensor<1x64xf32>
+    %2123 = "ttir.relu"(%2121, %2122) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2124 = tensor.empty() : tensor<1x64xf32>
+    %2125 = "ttir.relu"(%2123, %2124) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2126 = tensor.empty() : tensor<1x64xf32>
+    %2127 = "ttir.relu"(%2125, %2126) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2128 = tensor.empty() : tensor<1x64xf32>
+    %2129 = "ttir.relu"(%2127, %2128) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2130 = tensor.empty() : tensor<1x64xf32>
+    %2131 = "ttir.relu"(%2129, %2130) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2132 = tensor.empty() : tensor<1x64xf32>
+    %2133 = "ttir.relu"(%2131, %2132) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2134 = tensor.empty() : tensor<1x64xf32>
+    %2135 = "ttir.relu"(%2133, %2134) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2136 = tensor.empty() : tensor<1x64xf32>
+    %2137 = "ttir.relu"(%2135, %2136) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2138 = tensor.empty() : tensor<1x64xf32>
+    %2139 = "ttir.relu"(%2137, %2138) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2140 = tensor.empty() : tensor<1x64xf32>
+    %2141 = "ttir.relu"(%2139, %2140) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2142 = tensor.empty() : tensor<1x64xf32>
+    %2143 = "ttir.relu"(%2141, %2142) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2144 = tensor.empty() : tensor<1x64xf32>
+    %2145 = "ttir.relu"(%2143, %2144) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2146 = tensor.empty() : tensor<1x64xf32>
+    %2147 = "ttir.relu"(%2145, %2146) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2148 = tensor.empty() : tensor<1x64xf32>
+    %2149 = "ttir.relu"(%2147, %2148) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2150 = tensor.empty() : tensor<1x64xf32>
+    %2151 = "ttir.relu"(%2149, %2150) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2152 = tensor.empty() : tensor<1x64xf32>
+    %2153 = "ttir.relu"(%2151, %2152) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2154 = tensor.empty() : tensor<1x64xf32>
+    %2155 = "ttir.relu"(%2153, %2154) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2156 = tensor.empty() : tensor<1x64xf32>
+    %2157 = "ttir.relu"(%2155, %2156) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2158 = tensor.empty() : tensor<1x64xf32>
+    %2159 = "ttir.relu"(%2157, %2158) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2160 = tensor.empty() : tensor<1x64xf32>
+    %2161 = "ttir.relu"(%2159, %2160) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2162 = tensor.empty() : tensor<1x64xf32>
+    %2163 = "ttir.relu"(%2161, %2162) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2164 = tensor.empty() : tensor<1x64xf32>
+    %2165 = "ttir.relu"(%2163, %2164) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2166 = tensor.empty() : tensor<1x64xf32>
+    %2167 = "ttir.relu"(%2165, %2166) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2168 = tensor.empty() : tensor<1x64xf32>
+    %2169 = "ttir.relu"(%2167, %2168) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2170 = tensor.empty() : tensor<1x64xf32>
+    %2171 = "ttir.relu"(%2169, %2170) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2172 = tensor.empty() : tensor<1x64xf32>
+    %2173 = "ttir.relu"(%2171, %2172) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2174 = tensor.empty() : tensor<1x64xf32>
+    %2175 = "ttir.relu"(%2173, %2174) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2176 = tensor.empty() : tensor<1x64xf32>
+    %2177 = "ttir.relu"(%2175, %2176) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2178 = tensor.empty() : tensor<1x64xf32>
+    %2179 = "ttir.relu"(%2177, %2178) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2180 = tensor.empty() : tensor<1x64xf32>
+    %2181 = "ttir.relu"(%2179, %2180) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2182 = tensor.empty() : tensor<1x64xf32>
+    %2183 = "ttir.relu"(%2181, %2182) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2184 = tensor.empty() : tensor<1x64xf32>
+    %2185 = "ttir.relu"(%2183, %2184) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2186 = tensor.empty() : tensor<1x64xf32>
+    %2187 = "ttir.relu"(%2185, %2186) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2188 = tensor.empty() : tensor<1x64xf32>
+    %2189 = "ttir.relu"(%2187, %2188) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2190 = tensor.empty() : tensor<1x64xf32>
+    %2191 = "ttir.relu"(%2189, %2190) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2192 = tensor.empty() : tensor<1x64xf32>
+    %2193 = "ttir.relu"(%2191, %2192) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2194 = tensor.empty() : tensor<1x64xf32>
+    %2195 = "ttir.relu"(%2193, %2194) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2196 = tensor.empty() : tensor<1x64xf32>
+    %2197 = "ttir.relu"(%2195, %2196) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2198 = tensor.empty() : tensor<1x64xf32>
+    %2199 = "ttir.relu"(%2197, %2198) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2200 = tensor.empty() : tensor<1x64xf32>
+    %2201 = "ttir.relu"(%2199, %2200) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2202 = tensor.empty() : tensor<1x64xf32>
+    %2203 = "ttir.relu"(%2201, %2202) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2204 = tensor.empty() : tensor<1x64xf32>
+    %2205 = "ttir.relu"(%2203, %2204) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2206 = tensor.empty() : tensor<1x64xf32>
+    %2207 = "ttir.relu"(%2205, %2206) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2208 = tensor.empty() : tensor<1x64xf32>
+    %2209 = "ttir.relu"(%2207, %2208) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2210 = tensor.empty() : tensor<1x64xf32>
+    %2211 = "ttir.relu"(%2209, %2210) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2212 = tensor.empty() : tensor<1x64xf32>
+    %2213 = "ttir.relu"(%2211, %2212) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2214 = tensor.empty() : tensor<1x64xf32>
+    %2215 = "ttir.relu"(%2213, %2214) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2216 = tensor.empty() : tensor<1x64xf32>
+    %2217 = "ttir.relu"(%2215, %2216) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2218 = tensor.empty() : tensor<1x64xf32>
+    %2219 = "ttir.relu"(%2217, %2218) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2220 = tensor.empty() : tensor<1x64xf32>
+    %2221 = "ttir.relu"(%2219, %2220) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2222 = tensor.empty() : tensor<1x64xf32>
+    %2223 = "ttir.relu"(%2221, %2222) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2224 = tensor.empty() : tensor<1x64xf32>
+    %2225 = "ttir.relu"(%2223, %2224) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2226 = tensor.empty() : tensor<1x64xf32>
+    %2227 = "ttir.relu"(%2225, %2226) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2228 = tensor.empty() : tensor<1x64xf32>
+    %2229 = "ttir.relu"(%2227, %2228) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2230 = tensor.empty() : tensor<1x64xf32>
+    %2231 = "ttir.relu"(%2229, %2230) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2232 = tensor.empty() : tensor<1x64xf32>
+    %2233 = "ttir.relu"(%2231, %2232) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2234 = tensor.empty() : tensor<1x64xf32>
+    %2235 = "ttir.relu"(%2233, %2234) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2236 = tensor.empty() : tensor<1x64xf32>
+    %2237 = "ttir.relu"(%2235, %2236) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2238 = tensor.empty() : tensor<1x64xf32>
+    %2239 = "ttir.relu"(%2237, %2238) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2240 = tensor.empty() : tensor<1x64xf32>
+    %2241 = "ttir.relu"(%2239, %2240) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2242 = tensor.empty() : tensor<1x64xf32>
+    %2243 = "ttir.relu"(%2241, %2242) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2244 = tensor.empty() : tensor<1x64xf32>
+    %2245 = "ttir.relu"(%2243, %2244) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2246 = tensor.empty() : tensor<1x64xf32>
+    %2247 = "ttir.relu"(%2245, %2246) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2248 = tensor.empty() : tensor<1x64xf32>
+    %2249 = "ttir.relu"(%2247, %2248) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2250 = tensor.empty() : tensor<1x64xf32>
+    %2251 = "ttir.relu"(%2249, %2250) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2252 = tensor.empty() : tensor<1x64xf32>
+    %2253 = "ttir.relu"(%2251, %2252) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2254 = tensor.empty() : tensor<1x64xf32>
+    %2255 = "ttir.relu"(%2253, %2254) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2256 = tensor.empty() : tensor<1x64xf32>
+    %2257 = "ttir.relu"(%2255, %2256) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2258 = tensor.empty() : tensor<1x64xf32>
+    %2259 = "ttir.relu"(%2257, %2258) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2260 = tensor.empty() : tensor<1x64xf32>
+    %2261 = "ttir.relu"(%2259, %2260) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2262 = tensor.empty() : tensor<1x64xf32>
+    %2263 = "ttir.relu"(%2261, %2262) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2264 = tensor.empty() : tensor<1x64xf32>
+    %2265 = "ttir.relu"(%2263, %2264) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2266 = tensor.empty() : tensor<1x64xf32>
+    %2267 = "ttir.relu"(%2265, %2266) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2268 = tensor.empty() : tensor<1x64xf32>
+    %2269 = "ttir.relu"(%2267, %2268) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2270 = tensor.empty() : tensor<1x64xf32>
+    %2271 = "ttir.relu"(%2269, %2270) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2272 = tensor.empty() : tensor<1x64xf32>
+    %2273 = "ttir.relu"(%2271, %2272) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2274 = tensor.empty() : tensor<1x64xf32>
+    %2275 = "ttir.relu"(%2273, %2274) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2276 = tensor.empty() : tensor<1x64xf32>
+    %2277 = "ttir.relu"(%2275, %2276) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2278 = tensor.empty() : tensor<1x64xf32>
+    %2279 = "ttir.relu"(%2277, %2278) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2280 = tensor.empty() : tensor<1x64xf32>
+    %2281 = "ttir.relu"(%2279, %2280) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2282 = tensor.empty() : tensor<1x64xf32>
+    %2283 = "ttir.relu"(%2281, %2282) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2284 = tensor.empty() : tensor<1x64xf32>
+    %2285 = "ttir.relu"(%2283, %2284) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2286 = tensor.empty() : tensor<1x64xf32>
+    %2287 = "ttir.relu"(%2285, %2286) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2288 = tensor.empty() : tensor<1x64xf32>
+    %2289 = "ttir.relu"(%2287, %2288) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2290 = tensor.empty() : tensor<1x64xf32>
+    %2291 = "ttir.relu"(%2289, %2290) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2292 = tensor.empty() : tensor<1x64xf32>
+    %2293 = "ttir.relu"(%2291, %2292) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2294 = tensor.empty() : tensor<1x64xf32>
+    %2295 = "ttir.relu"(%2293, %2294) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2296 = tensor.empty() : tensor<1x64xf32>
+    %2297 = "ttir.relu"(%2295, %2296) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2298 = tensor.empty() : tensor<1x64xf32>
+    %2299 = "ttir.relu"(%2297, %2298) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2300 = tensor.empty() : tensor<1x64xf32>
+    %2301 = "ttir.relu"(%2299, %2300) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2302 = tensor.empty() : tensor<1x64xf32>
+    %2303 = "ttir.relu"(%2301, %2302) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2304 = tensor.empty() : tensor<1x64xf32>
+    %2305 = "ttir.relu"(%2303, %2304) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2306 = tensor.empty() : tensor<1x64xf32>
+    %2307 = "ttir.relu"(%2305, %2306) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2308 = tensor.empty() : tensor<1x64xf32>
+    %2309 = "ttir.relu"(%2307, %2308) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2310 = tensor.empty() : tensor<1x64xf32>
+    %2311 = "ttir.relu"(%2309, %2310) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2312 = tensor.empty() : tensor<1x64xf32>
+    %2313 = "ttir.relu"(%2311, %2312) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2314 = tensor.empty() : tensor<1x64xf32>
+    %2315 = "ttir.relu"(%2313, %2314) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2316 = tensor.empty() : tensor<1x64xf32>
+    %2317 = "ttir.relu"(%2315, %2316) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2318 = tensor.empty() : tensor<1x64xf32>
+    %2319 = "ttir.relu"(%2317, %2318) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2320 = tensor.empty() : tensor<1x64xf32>
+    %2321 = "ttir.relu"(%2319, %2320) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2322 = tensor.empty() : tensor<1x64xf32>
+    %2323 = "ttir.relu"(%2321, %2322) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2324 = tensor.empty() : tensor<1x64xf32>
+    %2325 = "ttir.relu"(%2323, %2324) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2326 = tensor.empty() : tensor<1x64xf32>
+    %2327 = "ttir.relu"(%2325, %2326) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2328 = tensor.empty() : tensor<1x64xf32>
+    %2329 = "ttir.relu"(%2327, %2328) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2330 = tensor.empty() : tensor<1x64xf32>
+    %2331 = "ttir.relu"(%2329, %2330) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2332 = tensor.empty() : tensor<1x64xf32>
+    %2333 = "ttir.relu"(%2331, %2332) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2334 = tensor.empty() : tensor<1x64xf32>
+    %2335 = "ttir.relu"(%2333, %2334) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2336 = tensor.empty() : tensor<1x64xf32>
+    %2337 = "ttir.relu"(%2335, %2336) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2338 = tensor.empty() : tensor<1x64xf32>
+    %2339 = "ttir.relu"(%2337, %2338) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2340 = tensor.empty() : tensor<1x64xf32>
+    %2341 = "ttir.relu"(%2339, %2340) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2342 = tensor.empty() : tensor<1x64xf32>
+    %2343 = "ttir.relu"(%2341, %2342) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2344 = tensor.empty() : tensor<1x64xf32>
+    %2345 = "ttir.relu"(%2343, %2344) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2346 = tensor.empty() : tensor<1x64xf32>
+    %2347 = "ttir.relu"(%2345, %2346) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2348 = tensor.empty() : tensor<1x64xf32>
+    %2349 = "ttir.relu"(%2347, %2348) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2350 = tensor.empty() : tensor<1x64xf32>
+    %2351 = "ttir.relu"(%2349, %2350) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2352 = tensor.empty() : tensor<1x64xf32>
+    %2353 = "ttir.relu"(%2351, %2352) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2354 = tensor.empty() : tensor<1x64xf32>
+    %2355 = "ttir.relu"(%2353, %2354) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2356 = tensor.empty() : tensor<1x64xf32>
+    %2357 = "ttir.relu"(%2355, %2356) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2358 = tensor.empty() : tensor<1x64xf32>
+    %2359 = "ttir.relu"(%2357, %2358) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2360 = tensor.empty() : tensor<1x64xf32>
+    %2361 = "ttir.relu"(%2359, %2360) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2362 = tensor.empty() : tensor<1x64xf32>
+    %2363 = "ttir.relu"(%2361, %2362) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2364 = tensor.empty() : tensor<1x64xf32>
+    %2365 = "ttir.relu"(%2363, %2364) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2366 = tensor.empty() : tensor<1x64xf32>
+    %2367 = "ttir.relu"(%2365, %2366) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2368 = tensor.empty() : tensor<1x64xf32>
+    %2369 = "ttir.relu"(%2367, %2368) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2370 = tensor.empty() : tensor<1x64xf32>
+    %2371 = "ttir.relu"(%2369, %2370) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2372 = tensor.empty() : tensor<1x64xf32>
+    %2373 = "ttir.relu"(%2371, %2372) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2374 = tensor.empty() : tensor<1x64xf32>
+    %2375 = "ttir.relu"(%2373, %2374) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2376 = tensor.empty() : tensor<1x64xf32>
+    %2377 = "ttir.relu"(%2375, %2376) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2378 = tensor.empty() : tensor<1x64xf32>
+    %2379 = "ttir.relu"(%2377, %2378) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2380 = tensor.empty() : tensor<1x64xf32>
+    %2381 = "ttir.relu"(%2379, %2380) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2382 = tensor.empty() : tensor<1x64xf32>
+    %2383 = "ttir.relu"(%2381, %2382) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2384 = tensor.empty() : tensor<1x64xf32>
+    %2385 = "ttir.relu"(%2383, %2384) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2386 = tensor.empty() : tensor<1x64xf32>
+    %2387 = "ttir.relu"(%2385, %2386) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2388 = tensor.empty() : tensor<1x64xf32>
+    %2389 = "ttir.relu"(%2387, %2388) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2390 = tensor.empty() : tensor<1x64xf32>
+    %2391 = "ttir.relu"(%2389, %2390) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2392 = tensor.empty() : tensor<1x64xf32>
+    %2393 = "ttir.relu"(%2391, %2392) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2394 = tensor.empty() : tensor<1x64xf32>
+    %2395 = "ttir.relu"(%2393, %2394) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2396 = tensor.empty() : tensor<1x64xf32>
+    %2397 = "ttir.relu"(%2395, %2396) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2398 = tensor.empty() : tensor<1x64xf32>
+    %2399 = "ttir.relu"(%2397, %2398) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2400 = tensor.empty() : tensor<1x64xf32>
+    %2401 = "ttir.relu"(%2399, %2400) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2402 = tensor.empty() : tensor<1x64xf32>
+    %2403 = "ttir.relu"(%2401, %2402) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2404 = tensor.empty() : tensor<1x64xf32>
+    %2405 = "ttir.relu"(%2403, %2404) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2406 = tensor.empty() : tensor<1x64xf32>
+    %2407 = "ttir.relu"(%2405, %2406) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2408 = tensor.empty() : tensor<1x64xf32>
+    %2409 = "ttir.relu"(%2407, %2408) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2410 = tensor.empty() : tensor<1x64xf32>
+    %2411 = "ttir.relu"(%2409, %2410) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2412 = tensor.empty() : tensor<1x64xf32>
+    %2413 = "ttir.relu"(%2411, %2412) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2414 = tensor.empty() : tensor<1x64xf32>
+    %2415 = "ttir.relu"(%2413, %2414) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2416 = tensor.empty() : tensor<1x64xf32>
+    %2417 = "ttir.relu"(%2415, %2416) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2418 = tensor.empty() : tensor<1x64xf32>
+    %2419 = "ttir.relu"(%2417, %2418) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2420 = tensor.empty() : tensor<1x64xf32>
+    %2421 = "ttir.relu"(%2419, %2420) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2422 = tensor.empty() : tensor<1x64xf32>
+    %2423 = "ttir.relu"(%2421, %2422) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2424 = tensor.empty() : tensor<1x64xf32>
+    %2425 = "ttir.relu"(%2423, %2424) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2426 = tensor.empty() : tensor<1x64xf32>
+    %2427 = "ttir.relu"(%2425, %2426) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2428 = tensor.empty() : tensor<1x64xf32>
+    %2429 = "ttir.relu"(%2427, %2428) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2430 = tensor.empty() : tensor<1x64xf32>
+    %2431 = "ttir.relu"(%2429, %2430) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2432 = tensor.empty() : tensor<1x64xf32>
+    %2433 = "ttir.relu"(%2431, %2432) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2434 = tensor.empty() : tensor<1x64xf32>
+    %2435 = "ttir.relu"(%2433, %2434) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2436 = tensor.empty() : tensor<1x64xf32>
+    %2437 = "ttir.relu"(%2435, %2436) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2438 = tensor.empty() : tensor<1x64xf32>
+    %2439 = "ttir.relu"(%2437, %2438) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2440 = tensor.empty() : tensor<1x64xf32>
+    %2441 = "ttir.relu"(%2439, %2440) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2442 = tensor.empty() : tensor<1x64xf32>
+    %2443 = "ttir.relu"(%2441, %2442) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2444 = tensor.empty() : tensor<1x64xf32>
+    %2445 = "ttir.relu"(%2443, %2444) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2446 = tensor.empty() : tensor<1x64xf32>
+    %2447 = "ttir.relu"(%2445, %2446) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2448 = tensor.empty() : tensor<1x64xf32>
+    %2449 = "ttir.relu"(%2447, %2448) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2450 = tensor.empty() : tensor<1x64xf32>
+    %2451 = "ttir.relu"(%2449, %2450) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2452 = tensor.empty() : tensor<1x64xf32>
+    %2453 = "ttir.relu"(%2451, %2452) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2454 = tensor.empty() : tensor<1x64xf32>
+    %2455 = "ttir.relu"(%2453, %2454) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2456 = tensor.empty() : tensor<1x64xf32>
+    %2457 = "ttir.relu"(%2455, %2456) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2458 = tensor.empty() : tensor<1x64xf32>
+    %2459 = "ttir.relu"(%2457, %2458) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2460 = tensor.empty() : tensor<1x64xf32>
+    %2461 = "ttir.relu"(%2459, %2460) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2462 = tensor.empty() : tensor<1x64xf32>
+    %2463 = "ttir.relu"(%2461, %2462) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2464 = tensor.empty() : tensor<1x64xf32>
+    %2465 = "ttir.relu"(%2463, %2464) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2466 = tensor.empty() : tensor<1x64xf32>
+    %2467 = "ttir.relu"(%2465, %2466) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2468 = tensor.empty() : tensor<1x64xf32>
+    %2469 = "ttir.relu"(%2467, %2468) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2470 = tensor.empty() : tensor<1x64xf32>
+    %2471 = "ttir.relu"(%2469, %2470) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2472 = tensor.empty() : tensor<1x64xf32>
+    %2473 = "ttir.relu"(%2471, %2472) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2474 = tensor.empty() : tensor<1x64xf32>
+    %2475 = "ttir.relu"(%2473, %2474) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2476 = tensor.empty() : tensor<1x64xf32>
+    %2477 = "ttir.relu"(%2475, %2476) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2478 = tensor.empty() : tensor<1x64xf32>
+    %2479 = "ttir.relu"(%2477, %2478) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2480 = tensor.empty() : tensor<1x64xf32>
+    %2481 = "ttir.relu"(%2479, %2480) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2482 = tensor.empty() : tensor<1x64xf32>
+    %2483 = "ttir.relu"(%2481, %2482) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2484 = tensor.empty() : tensor<1x64xf32>
+    %2485 = "ttir.relu"(%2483, %2484) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2486 = tensor.empty() : tensor<1x64xf32>
+    %2487 = "ttir.relu"(%2485, %2486) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2488 = tensor.empty() : tensor<1x64xf32>
+    %2489 = "ttir.relu"(%2487, %2488) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2490 = tensor.empty() : tensor<1x64xf32>
+    %2491 = "ttir.relu"(%2489, %2490) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2492 = tensor.empty() : tensor<1x64xf32>
+    %2493 = "ttir.relu"(%2491, %2492) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2494 = tensor.empty() : tensor<1x64xf32>
+    %2495 = "ttir.relu"(%2493, %2494) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2496 = tensor.empty() : tensor<1x64xf32>
+    %2497 = "ttir.relu"(%2495, %2496) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2498 = tensor.empty() : tensor<1x64xf32>
+    %2499 = "ttir.relu"(%2497, %2498) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2500 = tensor.empty() : tensor<1x64xf32>
+    %2501 = "ttir.relu"(%2499, %2500) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2502 = tensor.empty() : tensor<1x64xf32>
+    %2503 = "ttir.relu"(%2501, %2502) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2504 = tensor.empty() : tensor<1x64xf32>
+    %2505 = "ttir.relu"(%2503, %2504) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2506 = tensor.empty() : tensor<1x64xf32>
+    %2507 = "ttir.relu"(%2505, %2506) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2508 = tensor.empty() : tensor<1x64xf32>
+    %2509 = "ttir.relu"(%2507, %2508) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2510 = tensor.empty() : tensor<1x64xf32>
+    %2511 = "ttir.relu"(%2509, %2510) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2512 = tensor.empty() : tensor<1x64xf32>
+    %2513 = "ttir.relu"(%2511, %2512) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2514 = tensor.empty() : tensor<1x64xf32>
+    %2515 = "ttir.relu"(%2513, %2514) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2516 = tensor.empty() : tensor<1x64xf32>
+    %2517 = "ttir.relu"(%2515, %2516) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2518 = tensor.empty() : tensor<1x64xf32>
+    %2519 = "ttir.relu"(%2517, %2518) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2520 = tensor.empty() : tensor<1x64xf32>
+    %2521 = "ttir.relu"(%2519, %2520) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2522 = tensor.empty() : tensor<1x64xf32>
+    %2523 = "ttir.relu"(%2521, %2522) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2524 = tensor.empty() : tensor<1x64xf32>
+    %2525 = "ttir.relu"(%2523, %2524) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2526 = tensor.empty() : tensor<1x64xf32>
+    %2527 = "ttir.relu"(%2525, %2526) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2528 = tensor.empty() : tensor<1x64xf32>
+    %2529 = "ttir.relu"(%2527, %2528) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2530 = tensor.empty() : tensor<1x64xf32>
+    %2531 = "ttir.relu"(%2529, %2530) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2532 = tensor.empty() : tensor<1x64xf32>
+    %2533 = "ttir.relu"(%2531, %2532) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2534 = tensor.empty() : tensor<1x64xf32>
+    %2535 = "ttir.relu"(%2533, %2534) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2536 = tensor.empty() : tensor<1x64xf32>
+    %2537 = "ttir.relu"(%2535, %2536) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2538 = tensor.empty() : tensor<1x64xf32>
+    %2539 = "ttir.relu"(%2537, %2538) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2540 = tensor.empty() : tensor<1x64xf32>
+    %2541 = "ttir.relu"(%2539, %2540) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2542 = tensor.empty() : tensor<1x64xf32>
+    %2543 = "ttir.relu"(%2541, %2542) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2544 = tensor.empty() : tensor<1x64xf32>
+    %2545 = "ttir.relu"(%2543, %2544) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2546 = tensor.empty() : tensor<1x64xf32>
+    %2547 = "ttir.relu"(%2545, %2546) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2548 = tensor.empty() : tensor<1x64xf32>
+    %2549 = "ttir.relu"(%2547, %2548) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2550 = tensor.empty() : tensor<1x64xf32>
+    %2551 = "ttir.relu"(%2549, %2550) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2552 = tensor.empty() : tensor<1x64xf32>
+    %2553 = "ttir.relu"(%2551, %2552) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2554 = tensor.empty() : tensor<1x64xf32>
+    %2555 = "ttir.relu"(%2553, %2554) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2556 = tensor.empty() : tensor<1x64xf32>
+    %2557 = "ttir.relu"(%2555, %2556) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2558 = tensor.empty() : tensor<1x64xf32>
+    %2559 = "ttir.relu"(%2557, %2558) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2560 = tensor.empty() : tensor<1x64xf32>
+    %2561 = "ttir.relu"(%2559, %2560) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2562 = tensor.empty() : tensor<1x64xf32>
+    %2563 = "ttir.relu"(%2561, %2562) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2564 = tensor.empty() : tensor<1x64xf32>
+    %2565 = "ttir.relu"(%2563, %2564) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2566 = tensor.empty() : tensor<1x64xf32>
+    %2567 = "ttir.relu"(%2565, %2566) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2568 = tensor.empty() : tensor<1x64xf32>
+    %2569 = "ttir.relu"(%2567, %2568) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2570 = tensor.empty() : tensor<1x64xf32>
+    %2571 = "ttir.relu"(%2569, %2570) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2572 = tensor.empty() : tensor<1x64xf32>
+    %2573 = "ttir.relu"(%2571, %2572) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2574 = tensor.empty() : tensor<1x64xf32>
+    %2575 = "ttir.relu"(%2573, %2574) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2576 = tensor.empty() : tensor<1x64xf32>
+    %2577 = "ttir.relu"(%2575, %2576) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2578 = tensor.empty() : tensor<1x64xf32>
+    %2579 = "ttir.relu"(%2577, %2578) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2580 = tensor.empty() : tensor<1x64xf32>
+    %2581 = "ttir.relu"(%2579, %2580) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2582 = tensor.empty() : tensor<1x64xf32>
+    %2583 = "ttir.relu"(%2581, %2582) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2584 = tensor.empty() : tensor<1x64xf32>
+    %2585 = "ttir.relu"(%2583, %2584) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2586 = tensor.empty() : tensor<1x64xf32>
+    %2587 = "ttir.relu"(%2585, %2586) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2588 = tensor.empty() : tensor<1x64xf32>
+    %2589 = "ttir.relu"(%2587, %2588) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2590 = tensor.empty() : tensor<1x64xf32>
+    %2591 = "ttir.relu"(%2589, %2590) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2592 = tensor.empty() : tensor<1x64xf32>
+    %2593 = "ttir.relu"(%2591, %2592) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2594 = tensor.empty() : tensor<1x64xf32>
+    %2595 = "ttir.relu"(%2593, %2594) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2596 = tensor.empty() : tensor<1x64xf32>
+    %2597 = "ttir.relu"(%2595, %2596) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2598 = tensor.empty() : tensor<1x64xf32>
+    %2599 = "ttir.relu"(%2597, %2598) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2600 = tensor.empty() : tensor<1x64xf32>
+    %2601 = "ttir.relu"(%2599, %2600) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2602 = tensor.empty() : tensor<1x64xf32>
+    %2603 = "ttir.relu"(%2601, %2602) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2604 = tensor.empty() : tensor<1x64xf32>
+    %2605 = "ttir.relu"(%2603, %2604) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2606 = tensor.empty() : tensor<1x64xf32>
+    %2607 = "ttir.relu"(%2605, %2606) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2608 = tensor.empty() : tensor<1x64xf32>
+    %2609 = "ttir.relu"(%2607, %2608) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2610 = tensor.empty() : tensor<1x64xf32>
+    %2611 = "ttir.relu"(%2609, %2610) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2612 = tensor.empty() : tensor<1x64xf32>
+    %2613 = "ttir.relu"(%2611, %2612) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2614 = tensor.empty() : tensor<1x64xf32>
+    %2615 = "ttir.relu"(%2613, %2614) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2616 = tensor.empty() : tensor<1x64xf32>
+    %2617 = "ttir.relu"(%2615, %2616) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2618 = tensor.empty() : tensor<1x64xf32>
+    %2619 = "ttir.relu"(%2617, %2618) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2620 = tensor.empty() : tensor<1x64xf32>
+    %2621 = "ttir.relu"(%2619, %2620) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2622 = tensor.empty() : tensor<1x64xf32>
+    %2623 = "ttir.relu"(%2621, %2622) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2624 = tensor.empty() : tensor<1x64xf32>
+    %2625 = "ttir.relu"(%2623, %2624) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2626 = tensor.empty() : tensor<1x64xf32>
+    %2627 = "ttir.relu"(%2625, %2626) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2628 = tensor.empty() : tensor<1x64xf32>
+    %2629 = "ttir.relu"(%2627, %2628) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2630 = tensor.empty() : tensor<1x64xf32>
+    %2631 = "ttir.relu"(%2629, %2630) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2632 = tensor.empty() : tensor<1x64xf32>
+    %2633 = "ttir.relu"(%2631, %2632) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2634 = tensor.empty() : tensor<1x64xf32>
+    %2635 = "ttir.relu"(%2633, %2634) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2636 = tensor.empty() : tensor<1x64xf32>
+    %2637 = "ttir.relu"(%2635, %2636) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2638 = tensor.empty() : tensor<1x64xf32>
+    %2639 = "ttir.relu"(%2637, %2638) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2640 = tensor.empty() : tensor<1x64xf32>
+    %2641 = "ttir.relu"(%2639, %2640) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2642 = tensor.empty() : tensor<1x64xf32>
+    %2643 = "ttir.relu"(%2641, %2642) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2644 = tensor.empty() : tensor<1x64xf32>
+    %2645 = "ttir.relu"(%2643, %2644) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2646 = tensor.empty() : tensor<1x64xf32>
+    %2647 = "ttir.relu"(%2645, %2646) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2648 = tensor.empty() : tensor<1x64xf32>
+    %2649 = "ttir.relu"(%2647, %2648) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2650 = tensor.empty() : tensor<1x64xf32>
+    %2651 = "ttir.relu"(%2649, %2650) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2652 = tensor.empty() : tensor<1x64xf32>
+    %2653 = "ttir.relu"(%2651, %2652) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2654 = tensor.empty() : tensor<1x64xf32>
+    %2655 = "ttir.relu"(%2653, %2654) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2656 = tensor.empty() : tensor<1x64xf32>
+    %2657 = "ttir.relu"(%2655, %2656) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2658 = tensor.empty() : tensor<1x64xf32>
+    %2659 = "ttir.relu"(%2657, %2658) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2660 = tensor.empty() : tensor<1x64xf32>
+    %2661 = "ttir.relu"(%2659, %2660) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2662 = tensor.empty() : tensor<1x64xf32>
+    %2663 = "ttir.relu"(%2661, %2662) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2664 = tensor.empty() : tensor<1x64xf32>
+    %2665 = "ttir.relu"(%2663, %2664) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2666 = tensor.empty() : tensor<1x64xf32>
+    %2667 = "ttir.relu"(%2665, %2666) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2668 = tensor.empty() : tensor<1x64xf32>
+    %2669 = "ttir.relu"(%2667, %2668) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2670 = tensor.empty() : tensor<1x64xf32>
+    %2671 = "ttir.relu"(%2669, %2670) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2672 = tensor.empty() : tensor<1x64xf32>
+    %2673 = "ttir.relu"(%2671, %2672) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2674 = tensor.empty() : tensor<1x64xf32>
+    %2675 = "ttir.relu"(%2673, %2674) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2676 = tensor.empty() : tensor<1x64xf32>
+    %2677 = "ttir.relu"(%2675, %2676) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2678 = tensor.empty() : tensor<1x64xf32>
+    %2679 = "ttir.relu"(%2677, %2678) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2680 = tensor.empty() : tensor<1x64xf32>
+    %2681 = "ttir.relu"(%2679, %2680) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2682 = tensor.empty() : tensor<1x64xf32>
+    %2683 = "ttir.relu"(%2681, %2682) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2684 = tensor.empty() : tensor<1x64xf32>
+    %2685 = "ttir.relu"(%2683, %2684) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2686 = tensor.empty() : tensor<1x64xf32>
+    %2687 = "ttir.relu"(%2685, %2686) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2688 = tensor.empty() : tensor<1x64xf32>
+    %2689 = "ttir.relu"(%2687, %2688) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2690 = tensor.empty() : tensor<1x64xf32>
+    %2691 = "ttir.relu"(%2689, %2690) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2692 = tensor.empty() : tensor<1x64xf32>
+    %2693 = "ttir.relu"(%2691, %2692) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2694 = tensor.empty() : tensor<1x64xf32>
+    %2695 = "ttir.relu"(%2693, %2694) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2696 = tensor.empty() : tensor<1x64xf32>
+    %2697 = "ttir.relu"(%2695, %2696) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2698 = tensor.empty() : tensor<1x64xf32>
+    %2699 = "ttir.relu"(%2697, %2698) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2700 = tensor.empty() : tensor<1x64xf32>
+    %2701 = "ttir.relu"(%2699, %2700) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2702 = tensor.empty() : tensor<1x64xf32>
+    %2703 = "ttir.relu"(%2701, %2702) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2704 = tensor.empty() : tensor<1x64xf32>
+    %2705 = "ttir.relu"(%2703, %2704) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2706 = tensor.empty() : tensor<1x64xf32>
+    %2707 = "ttir.relu"(%2705, %2706) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2708 = tensor.empty() : tensor<1x64xf32>
+    %2709 = "ttir.relu"(%2707, %2708) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2710 = tensor.empty() : tensor<1x64xf32>
+    %2711 = "ttir.relu"(%2709, %2710) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2712 = tensor.empty() : tensor<1x64xf32>
+    %2713 = "ttir.relu"(%2711, %2712) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2714 = tensor.empty() : tensor<1x64xf32>
+    %2715 = "ttir.relu"(%2713, %2714) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2716 = tensor.empty() : tensor<1x64xf32>
+    %2717 = "ttir.relu"(%2715, %2716) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2718 = tensor.empty() : tensor<1x64xf32>
+    %2719 = "ttir.relu"(%2717, %2718) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2720 = tensor.empty() : tensor<1x64xf32>
+    %2721 = "ttir.relu"(%2719, %2720) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2722 = tensor.empty() : tensor<1x64xf32>
+    %2723 = "ttir.relu"(%2721, %2722) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2724 = tensor.empty() : tensor<1x64xf32>
+    %2725 = "ttir.relu"(%2723, %2724) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2726 = tensor.empty() : tensor<1x64xf32>
+    %2727 = "ttir.relu"(%2725, %2726) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2728 = tensor.empty() : tensor<1x64xf32>
+    %2729 = "ttir.relu"(%2727, %2728) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2730 = tensor.empty() : tensor<1x64xf32>
+    %2731 = "ttir.relu"(%2729, %2730) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2732 = tensor.empty() : tensor<1x64xf32>
+    %2733 = "ttir.relu"(%2731, %2732) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2734 = tensor.empty() : tensor<1x64xf32>
+    %2735 = "ttir.relu"(%2733, %2734) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2736 = tensor.empty() : tensor<1x64xf32>
+    %2737 = "ttir.relu"(%2735, %2736) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2738 = tensor.empty() : tensor<1x64xf32>
+    %2739 = "ttir.relu"(%2737, %2738) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2740 = tensor.empty() : tensor<1x64xf32>
+    %2741 = "ttir.relu"(%2739, %2740) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2742 = tensor.empty() : tensor<1x64xf32>
+    %2743 = "ttir.relu"(%2741, %2742) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2744 = tensor.empty() : tensor<1x64xf32>
+    %2745 = "ttir.relu"(%2743, %2744) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2746 = tensor.empty() : tensor<1x64xf32>
+    %2747 = "ttir.relu"(%2745, %2746) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2748 = tensor.empty() : tensor<1x64xf32>
+    %2749 = "ttir.relu"(%2747, %2748) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2750 = tensor.empty() : tensor<1x64xf32>
+    %2751 = "ttir.relu"(%2749, %2750) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2752 = tensor.empty() : tensor<1x64xf32>
+    %2753 = "ttir.relu"(%2751, %2752) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2754 = tensor.empty() : tensor<1x64xf32>
+    %2755 = "ttir.relu"(%2753, %2754) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2756 = tensor.empty() : tensor<1x64xf32>
+    %2757 = "ttir.relu"(%2755, %2756) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2758 = tensor.empty() : tensor<1x64xf32>
+    %2759 = "ttir.relu"(%2757, %2758) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2760 = tensor.empty() : tensor<1x64xf32>
+    %2761 = "ttir.relu"(%2759, %2760) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2762 = tensor.empty() : tensor<1x64xf32>
+    %2763 = "ttir.relu"(%2761, %2762) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2764 = tensor.empty() : tensor<1x64xf32>
+    %2765 = "ttir.relu"(%2763, %2764) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2766 = tensor.empty() : tensor<1x64xf32>
+    %2767 = "ttir.relu"(%2765, %2766) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2768 = tensor.empty() : tensor<1x64xf32>
+    %2769 = "ttir.relu"(%2767, %2768) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2770 = tensor.empty() : tensor<1x64xf32>
+    %2771 = "ttir.relu"(%2769, %2770) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2772 = tensor.empty() : tensor<1x64xf32>
+    %2773 = "ttir.relu"(%2771, %2772) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2774 = tensor.empty() : tensor<1x64xf32>
+    %2775 = "ttir.relu"(%2773, %2774) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2776 = tensor.empty() : tensor<1x64xf32>
+    %2777 = "ttir.relu"(%2775, %2776) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2778 = tensor.empty() : tensor<1x64xf32>
+    %2779 = "ttir.relu"(%2777, %2778) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2780 = tensor.empty() : tensor<1x64xf32>
+    %2781 = "ttir.relu"(%2779, %2780) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2782 = tensor.empty() : tensor<1x64xf32>
+    %2783 = "ttir.relu"(%2781, %2782) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2784 = tensor.empty() : tensor<1x64xf32>
+    %2785 = "ttir.relu"(%2783, %2784) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2786 = tensor.empty() : tensor<1x64xf32>
+    %2787 = "ttir.relu"(%2785, %2786) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2788 = tensor.empty() : tensor<1x64xf32>
+    %2789 = "ttir.relu"(%2787, %2788) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2790 = tensor.empty() : tensor<1x64xf32>
+    %2791 = "ttir.relu"(%2789, %2790) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2792 = tensor.empty() : tensor<1x64xf32>
+    %2793 = "ttir.relu"(%2791, %2792) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2794 = tensor.empty() : tensor<1x64xf32>
+    %2795 = "ttir.relu"(%2793, %2794) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2796 = tensor.empty() : tensor<1x64xf32>
+    %2797 = "ttir.relu"(%2795, %2796) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2798 = tensor.empty() : tensor<1x64xf32>
+    %2799 = "ttir.relu"(%2797, %2798) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2800 = tensor.empty() : tensor<1x64xf32>
+    %2801 = "ttir.relu"(%2799, %2800) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2802 = tensor.empty() : tensor<1x64xf32>
+    %2803 = "ttir.relu"(%2801, %2802) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2804 = tensor.empty() : tensor<1x64xf32>
+    %2805 = "ttir.relu"(%2803, %2804) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2806 = tensor.empty() : tensor<1x64xf32>
+    %2807 = "ttir.relu"(%2805, %2806) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2808 = tensor.empty() : tensor<1x64xf32>
+    %2809 = "ttir.relu"(%2807, %2808) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2810 = tensor.empty() : tensor<1x64xf32>
+    %2811 = "ttir.relu"(%2809, %2810) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2812 = tensor.empty() : tensor<1x64xf32>
+    %2813 = "ttir.relu"(%2811, %2812) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2814 = tensor.empty() : tensor<1x64xf32>
+    %2815 = "ttir.relu"(%2813, %2814) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2816 = tensor.empty() : tensor<1x64xf32>
+    %2817 = "ttir.relu"(%2815, %2816) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2818 = tensor.empty() : tensor<1x64xf32>
+    %2819 = "ttir.relu"(%2817, %2818) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2820 = tensor.empty() : tensor<1x64xf32>
+    %2821 = "ttir.relu"(%2819, %2820) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2822 = tensor.empty() : tensor<1x64xf32>
+    %2823 = "ttir.relu"(%2821, %2822) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2824 = tensor.empty() : tensor<1x64xf32>
+    %2825 = "ttir.relu"(%2823, %2824) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2826 = tensor.empty() : tensor<1x64xf32>
+    %2827 = "ttir.relu"(%2825, %2826) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2828 = tensor.empty() : tensor<1x64xf32>
+    %2829 = "ttir.relu"(%2827, %2828) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2830 = tensor.empty() : tensor<1x64xf32>
+    %2831 = "ttir.relu"(%2829, %2830) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2832 = tensor.empty() : tensor<1x64xf32>
+    %2833 = "ttir.relu"(%2831, %2832) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2834 = tensor.empty() : tensor<1x64xf32>
+    %2835 = "ttir.relu"(%2833, %2834) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2836 = tensor.empty() : tensor<1x64xf32>
+    %2837 = "ttir.relu"(%2835, %2836) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2838 = tensor.empty() : tensor<1x64xf32>
+    %2839 = "ttir.relu"(%2837, %2838) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2840 = tensor.empty() : tensor<1x64xf32>
+    %2841 = "ttir.relu"(%2839, %2840) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2842 = tensor.empty() : tensor<1x64xf32>
+    %2843 = "ttir.relu"(%2841, %2842) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2844 = tensor.empty() : tensor<1x64xf32>
+    %2845 = "ttir.relu"(%2843, %2844) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2846 = tensor.empty() : tensor<1x64xf32>
+    %2847 = "ttir.relu"(%2845, %2846) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2848 = tensor.empty() : tensor<1x64xf32>
+    %2849 = "ttir.relu"(%2847, %2848) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2850 = tensor.empty() : tensor<1x64xf32>
+    %2851 = "ttir.relu"(%2849, %2850) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2852 = tensor.empty() : tensor<1x64xf32>
+    %2853 = "ttir.relu"(%2851, %2852) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2854 = tensor.empty() : tensor<1x64xf32>
+    %2855 = "ttir.relu"(%2853, %2854) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2856 = tensor.empty() : tensor<1x64xf32>
+    %2857 = "ttir.relu"(%2855, %2856) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2858 = tensor.empty() : tensor<1x64xf32>
+    %2859 = "ttir.relu"(%2857, %2858) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2860 = tensor.empty() : tensor<1x64xf32>
+    %2861 = "ttir.relu"(%2859, %2860) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2862 = tensor.empty() : tensor<1x64xf32>
+    %2863 = "ttir.relu"(%2861, %2862) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2864 = tensor.empty() : tensor<1x64xf32>
+    %2865 = "ttir.relu"(%2863, %2864) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2866 = tensor.empty() : tensor<1x64xf32>
+    %2867 = "ttir.relu"(%2865, %2866) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2868 = tensor.empty() : tensor<1x64xf32>
+    %2869 = "ttir.relu"(%2867, %2868) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2870 = tensor.empty() : tensor<1x64xf32>
+    %2871 = "ttir.relu"(%2869, %2870) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2872 = tensor.empty() : tensor<1x64xf32>
+    %2873 = "ttir.relu"(%2871, %2872) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2874 = tensor.empty() : tensor<1x64xf32>
+    %2875 = "ttir.relu"(%2873, %2874) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2876 = tensor.empty() : tensor<1x64xf32>
+    %2877 = "ttir.relu"(%2875, %2876) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2878 = tensor.empty() : tensor<1x64xf32>
+    %2879 = "ttir.relu"(%2877, %2878) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2880 = tensor.empty() : tensor<1x64xf32>
+    %2881 = "ttir.relu"(%2879, %2880) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2882 = tensor.empty() : tensor<1x64xf32>
+    %2883 = "ttir.relu"(%2881, %2882) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2884 = tensor.empty() : tensor<1x64xf32>
+    %2885 = "ttir.relu"(%2883, %2884) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2886 = tensor.empty() : tensor<1x64xf32>
+    %2887 = "ttir.relu"(%2885, %2886) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2888 = tensor.empty() : tensor<1x64xf32>
+    %2889 = "ttir.relu"(%2887, %2888) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2890 = tensor.empty() : tensor<1x64xf32>
+    %2891 = "ttir.relu"(%2889, %2890) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2892 = tensor.empty() : tensor<1x64xf32>
+    %2893 = "ttir.relu"(%2891, %2892) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2894 = tensor.empty() : tensor<1x64xf32>
+    %2895 = "ttir.relu"(%2893, %2894) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2896 = tensor.empty() : tensor<1x64xf32>
+    %2897 = "ttir.relu"(%2895, %2896) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2898 = tensor.empty() : tensor<1x64xf32>
+    %2899 = "ttir.relu"(%2897, %2898) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2900 = tensor.empty() : tensor<1x64xf32>
+    %2901 = "ttir.relu"(%2899, %2900) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2902 = tensor.empty() : tensor<1x64xf32>
+    %2903 = "ttir.relu"(%2901, %2902) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2904 = tensor.empty() : tensor<1x64xf32>
+    %2905 = "ttir.relu"(%2903, %2904) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2906 = tensor.empty() : tensor<1x64xf32>
+    %2907 = "ttir.relu"(%2905, %2906) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2908 = tensor.empty() : tensor<1x64xf32>
+    %2909 = "ttir.relu"(%2907, %2908) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2910 = tensor.empty() : tensor<1x64xf32>
+    %2911 = "ttir.relu"(%2909, %2910) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2912 = tensor.empty() : tensor<1x64xf32>
+    %2913 = "ttir.relu"(%2911, %2912) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2914 = tensor.empty() : tensor<1x64xf32>
+    %2915 = "ttir.relu"(%2913, %2914) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2916 = tensor.empty() : tensor<1x64xf32>
+    %2917 = "ttir.relu"(%2915, %2916) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2918 = tensor.empty() : tensor<1x64xf32>
+    %2919 = "ttir.relu"(%2917, %2918) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2920 = tensor.empty() : tensor<1x64xf32>
+    %2921 = "ttir.relu"(%2919, %2920) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2922 = tensor.empty() : tensor<1x64xf32>
+    %2923 = "ttir.relu"(%2921, %2922) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2924 = tensor.empty() : tensor<1x64xf32>
+    %2925 = "ttir.relu"(%2923, %2924) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2926 = tensor.empty() : tensor<1x64xf32>
+    %2927 = "ttir.relu"(%2925, %2926) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2928 = tensor.empty() : tensor<1x64xf32>
+    %2929 = "ttir.relu"(%2927, %2928) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2930 = tensor.empty() : tensor<1x64xf32>
+    %2931 = "ttir.relu"(%2929, %2930) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2932 = tensor.empty() : tensor<1x64xf32>
+    %2933 = "ttir.relu"(%2931, %2932) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2934 = tensor.empty() : tensor<1x64xf32>
+    %2935 = "ttir.relu"(%2933, %2934) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2936 = tensor.empty() : tensor<1x64xf32>
+    %2937 = "ttir.relu"(%2935, %2936) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2938 = tensor.empty() : tensor<1x64xf32>
+    %2939 = "ttir.relu"(%2937, %2938) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2940 = tensor.empty() : tensor<1x64xf32>
+    %2941 = "ttir.relu"(%2939, %2940) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2942 = tensor.empty() : tensor<1x64xf32>
+    %2943 = "ttir.relu"(%2941, %2942) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2944 = tensor.empty() : tensor<1x64xf32>
+    %2945 = "ttir.relu"(%2943, %2944) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2946 = tensor.empty() : tensor<1x64xf32>
+    %2947 = "ttir.relu"(%2945, %2946) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2948 = tensor.empty() : tensor<1x64xf32>
+    %2949 = "ttir.relu"(%2947, %2948) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2950 = tensor.empty() : tensor<1x64xf32>
+    %2951 = "ttir.relu"(%2949, %2950) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2952 = tensor.empty() : tensor<1x64xf32>
+    %2953 = "ttir.relu"(%2951, %2952) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2954 = tensor.empty() : tensor<1x64xf32>
+    %2955 = "ttir.relu"(%2953, %2954) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2956 = tensor.empty() : tensor<1x64xf32>
+    %2957 = "ttir.relu"(%2955, %2956) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2958 = tensor.empty() : tensor<1x64xf32>
+    %2959 = "ttir.relu"(%2957, %2958) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2960 = tensor.empty() : tensor<1x64xf32>
+    %2961 = "ttir.relu"(%2959, %2960) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2962 = tensor.empty() : tensor<1x64xf32>
+    %2963 = "ttir.relu"(%2961, %2962) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2964 = tensor.empty() : tensor<1x64xf32>
+    %2965 = "ttir.relu"(%2963, %2964) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2966 = tensor.empty() : tensor<1x64xf32>
+    %2967 = "ttir.relu"(%2965, %2966) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2968 = tensor.empty() : tensor<1x64xf32>
+    %2969 = "ttir.relu"(%2967, %2968) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2970 = tensor.empty() : tensor<1x64xf32>
+    %2971 = "ttir.relu"(%2969, %2970) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2972 = tensor.empty() : tensor<1x64xf32>
+    %2973 = "ttir.relu"(%2971, %2972) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2974 = tensor.empty() : tensor<1x64xf32>
+    %2975 = "ttir.relu"(%2973, %2974) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2976 = tensor.empty() : tensor<1x64xf32>
+    %2977 = "ttir.relu"(%2975, %2976) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2978 = tensor.empty() : tensor<1x64xf32>
+    %2979 = "ttir.relu"(%2977, %2978) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2980 = tensor.empty() : tensor<1x64xf32>
+    %2981 = "ttir.relu"(%2979, %2980) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2982 = tensor.empty() : tensor<1x64xf32>
+    %2983 = "ttir.relu"(%2981, %2982) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2984 = tensor.empty() : tensor<1x64xf32>
+    %2985 = "ttir.relu"(%2983, %2984) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2986 = tensor.empty() : tensor<1x64xf32>
+    %2987 = "ttir.relu"(%2985, %2986) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2988 = tensor.empty() : tensor<1x64xf32>
+    %2989 = "ttir.relu"(%2987, %2988) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2990 = tensor.empty() : tensor<1x64xf32>
+    %2991 = "ttir.relu"(%2989, %2990) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2992 = tensor.empty() : tensor<1x64xf32>
+    %2993 = "ttir.relu"(%2991, %2992) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2994 = tensor.empty() : tensor<1x64xf32>
+    %2995 = "ttir.relu"(%2993, %2994) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2996 = tensor.empty() : tensor<1x64xf32>
+    %2997 = "ttir.relu"(%2995, %2996) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2998 = tensor.empty() : tensor<1x64xf32>
+    %2999 = "ttir.relu"(%2997, %2998) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3000 = tensor.empty() : tensor<1x64xf32>
+    %3001 = "ttir.relu"(%2999, %3000) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3002 = tensor.empty() : tensor<1x64xf32>
+    %3003 = "ttir.relu"(%3001, %3002) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3004 = tensor.empty() : tensor<1x64xf32>
+    %3005 = "ttir.relu"(%3003, %3004) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3006 = tensor.empty() : tensor<1x64xf32>
+    %3007 = "ttir.relu"(%3005, %3006) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3008 = tensor.empty() : tensor<1x64xf32>
+    %3009 = "ttir.relu"(%3007, %3008) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3010 = tensor.empty() : tensor<1x64xf32>
+    %3011 = "ttir.relu"(%3009, %3010) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3012 = tensor.empty() : tensor<1x64xf32>
+    %3013 = "ttir.relu"(%3011, %3012) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3014 = tensor.empty() : tensor<1x64xf32>
+    %3015 = "ttir.relu"(%3013, %3014) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3016 = tensor.empty() : tensor<1x64xf32>
+    %3017 = "ttir.relu"(%3015, %3016) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3018 = tensor.empty() : tensor<1x64xf32>
+    %3019 = "ttir.relu"(%3017, %3018) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3020 = tensor.empty() : tensor<1x64xf32>
+    %3021 = "ttir.relu"(%3019, %3020) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3022 = tensor.empty() : tensor<1x64xf32>
+    %3023 = "ttir.relu"(%3021, %3022) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3024 = tensor.empty() : tensor<1x64xf32>
+    %3025 = "ttir.relu"(%3023, %3024) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3026 = tensor.empty() : tensor<1x64xf32>
+    %3027 = "ttir.relu"(%3025, %3026) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3028 = tensor.empty() : tensor<1x64xf32>
+    %3029 = "ttir.relu"(%3027, %3028) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3030 = tensor.empty() : tensor<1x64xf32>
+    %3031 = "ttir.relu"(%3029, %3030) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3032 = tensor.empty() : tensor<1x64xf32>
+    %3033 = "ttir.relu"(%3031, %3032) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3034 = tensor.empty() : tensor<1x64xf32>
+    %3035 = "ttir.relu"(%3033, %3034) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3036 = tensor.empty() : tensor<1x64xf32>
+    %3037 = "ttir.relu"(%3035, %3036) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3038 = tensor.empty() : tensor<1x64xf32>
+    %3039 = "ttir.relu"(%3037, %3038) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3040 = tensor.empty() : tensor<1x64xf32>
+    %3041 = "ttir.relu"(%3039, %3040) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3042 = tensor.empty() : tensor<1x64xf32>
+    %3043 = "ttir.relu"(%3041, %3042) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3044 = tensor.empty() : tensor<1x64xf32>
+    %3045 = "ttir.relu"(%3043, %3044) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3046 = tensor.empty() : tensor<1x64xf32>
+    %3047 = "ttir.relu"(%3045, %3046) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3048 = tensor.empty() : tensor<1x64xf32>
+    %3049 = "ttir.relu"(%3047, %3048) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3050 = tensor.empty() : tensor<1x64xf32>
+    %3051 = "ttir.relu"(%3049, %3050) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3052 = tensor.empty() : tensor<1x64xf32>
+    %3053 = "ttir.relu"(%3051, %3052) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3054 = tensor.empty() : tensor<1x64xf32>
+    %3055 = "ttir.relu"(%3053, %3054) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3056 = tensor.empty() : tensor<1x64xf32>
+    %3057 = "ttir.relu"(%3055, %3056) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3058 = tensor.empty() : tensor<1x64xf32>
+    %3059 = "ttir.relu"(%3057, %3058) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3060 = tensor.empty() : tensor<1x64xf32>
+    %3061 = "ttir.relu"(%3059, %3060) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3062 = tensor.empty() : tensor<1x64xf32>
+    %3063 = "ttir.relu"(%3061, %3062) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3064 = tensor.empty() : tensor<1x64xf32>
+    %3065 = "ttir.relu"(%3063, %3064) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3066 = tensor.empty() : tensor<1x64xf32>
+    %3067 = "ttir.relu"(%3065, %3066) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3068 = tensor.empty() : tensor<1x64xf32>
+    %3069 = "ttir.relu"(%3067, %3068) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3070 = tensor.empty() : tensor<1x64xf32>
+    %3071 = "ttir.relu"(%3069, %3070) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3072 = tensor.empty() : tensor<1x64xf32>
+    %3073 = "ttir.relu"(%3071, %3072) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3074 = tensor.empty() : tensor<1x64xf32>
+    %3075 = "ttir.relu"(%3073, %3074) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3076 = tensor.empty() : tensor<1x64xf32>
+    %3077 = "ttir.relu"(%3075, %3076) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3078 = tensor.empty() : tensor<1x64xf32>
+    %3079 = "ttir.relu"(%3077, %3078) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3080 = tensor.empty() : tensor<1x64xf32>
+    %3081 = "ttir.relu"(%3079, %3080) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3082 = tensor.empty() : tensor<1x64xf32>
+    %3083 = "ttir.relu"(%3081, %3082) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3084 = tensor.empty() : tensor<1x64xf32>
+    %3085 = "ttir.relu"(%3083, %3084) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3086 = tensor.empty() : tensor<1x64xf32>
+    %3087 = "ttir.relu"(%3085, %3086) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3088 = tensor.empty() : tensor<1x64xf32>
+    %3089 = "ttir.relu"(%3087, %3088) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3090 = tensor.empty() : tensor<1x64xf32>
+    %3091 = "ttir.relu"(%3089, %3090) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3092 = tensor.empty() : tensor<1x64xf32>
+    %3093 = "ttir.relu"(%3091, %3092) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3094 = tensor.empty() : tensor<1x64xf32>
+    %3095 = "ttir.relu"(%3093, %3094) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3096 = tensor.empty() : tensor<1x64xf32>
+    %3097 = "ttir.relu"(%3095, %3096) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3098 = tensor.empty() : tensor<1x64xf32>
+    %3099 = "ttir.relu"(%3097, %3098) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3100 = tensor.empty() : tensor<1x64xf32>
+    %3101 = "ttir.relu"(%3099, %3100) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3102 = tensor.empty() : tensor<1x64xf32>
+    %3103 = "ttir.relu"(%3101, %3102) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3104 = tensor.empty() : tensor<1x64xf32>
+    %3105 = "ttir.relu"(%3103, %3104) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3106 = tensor.empty() : tensor<1x64xf32>
+    %3107 = "ttir.relu"(%3105, %3106) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3108 = tensor.empty() : tensor<1x64xf32>
+    %3109 = "ttir.relu"(%3107, %3108) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3110 = tensor.empty() : tensor<1x64xf32>
+    %3111 = "ttir.relu"(%3109, %3110) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3112 = tensor.empty() : tensor<1x64xf32>
+    %3113 = "ttir.relu"(%3111, %3112) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3114 = tensor.empty() : tensor<1x64xf32>
+    %3115 = "ttir.relu"(%3113, %3114) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3116 = tensor.empty() : tensor<1x64xf32>
+    %3117 = "ttir.relu"(%3115, %3116) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3118 = tensor.empty() : tensor<1x64xf32>
+    %3119 = "ttir.relu"(%3117, %3118) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3120 = tensor.empty() : tensor<1x64xf32>
+    %3121 = "ttir.relu"(%3119, %3120) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3122 = tensor.empty() : tensor<1x64xf32>
+    %3123 = "ttir.relu"(%3121, %3122) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3124 = tensor.empty() : tensor<1x64xf32>
+    %3125 = "ttir.relu"(%3123, %3124) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3126 = tensor.empty() : tensor<1x64xf32>
+    %3127 = "ttir.relu"(%3125, %3126) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3128 = tensor.empty() : tensor<1x64xf32>
+    %3129 = "ttir.relu"(%3127, %3128) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3130 = tensor.empty() : tensor<1x64xf32>
+    %3131 = "ttir.relu"(%3129, %3130) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3132 = tensor.empty() : tensor<1x64xf32>
+    %3133 = "ttir.relu"(%3131, %3132) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3134 = tensor.empty() : tensor<1x64xf32>
+    %3135 = "ttir.relu"(%3133, %3134) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3136 = tensor.empty() : tensor<1x64xf32>
+    %3137 = "ttir.relu"(%3135, %3136) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3138 = tensor.empty() : tensor<1x64xf32>
+    %3139 = "ttir.relu"(%3137, %3138) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3140 = tensor.empty() : tensor<1x64xf32>
+    %3141 = "ttir.relu"(%3139, %3140) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3142 = tensor.empty() : tensor<1x64xf32>
+    %3143 = "ttir.relu"(%3141, %3142) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3144 = tensor.empty() : tensor<1x64xf32>
+    %3145 = "ttir.relu"(%3143, %3144) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3146 = tensor.empty() : tensor<1x64xf32>
+    %3147 = "ttir.relu"(%3145, %3146) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3148 = tensor.empty() : tensor<1x64xf32>
+    %3149 = "ttir.relu"(%3147, %3148) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3150 = tensor.empty() : tensor<1x64xf32>
+    %3151 = "ttir.relu"(%3149, %3150) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3152 = tensor.empty() : tensor<1x64xf32>
+    %3153 = "ttir.relu"(%3151, %3152) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3154 = tensor.empty() : tensor<1x64xf32>
+    %3155 = "ttir.relu"(%3153, %3154) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3156 = tensor.empty() : tensor<1x64xf32>
+    %3157 = "ttir.relu"(%3155, %3156) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3158 = tensor.empty() : tensor<1x64xf32>
+    %3159 = "ttir.relu"(%3157, %3158) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3160 = tensor.empty() : tensor<1x64xf32>
+    %3161 = "ttir.relu"(%3159, %3160) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3162 = tensor.empty() : tensor<1x64xf32>
+    %3163 = "ttir.relu"(%3161, %3162) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3164 = tensor.empty() : tensor<1x64xf32>
+    %3165 = "ttir.relu"(%3163, %3164) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3166 = tensor.empty() : tensor<1x64xf32>
+    %3167 = "ttir.relu"(%3165, %3166) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3168 = tensor.empty() : tensor<1x64xf32>
+    %3169 = "ttir.relu"(%3167, %3168) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3170 = tensor.empty() : tensor<1x64xf32>
+    %3171 = "ttir.relu"(%3169, %3170) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3172 = tensor.empty() : tensor<1x64xf32>
+    %3173 = "ttir.relu"(%3171, %3172) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3174 = tensor.empty() : tensor<1x64xf32>
+    %3175 = "ttir.relu"(%3173, %3174) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3176 = tensor.empty() : tensor<1x64xf32>
+    %3177 = "ttir.relu"(%3175, %3176) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3178 = tensor.empty() : tensor<1x64xf32>
+    %3179 = "ttir.relu"(%3177, %3178) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3180 = tensor.empty() : tensor<1x64xf32>
+    %3181 = "ttir.relu"(%3179, %3180) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3182 = tensor.empty() : tensor<1x64xf32>
+    %3183 = "ttir.relu"(%3181, %3182) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3184 = tensor.empty() : tensor<1x64xf32>
+    %3185 = "ttir.relu"(%3183, %3184) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3186 = tensor.empty() : tensor<1x64xf32>
+    %3187 = "ttir.relu"(%3185, %3186) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3188 = tensor.empty() : tensor<1x64xf32>
+    %3189 = "ttir.relu"(%3187, %3188) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3190 = tensor.empty() : tensor<1x64xf32>
+    %3191 = "ttir.relu"(%3189, %3190) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3192 = tensor.empty() : tensor<1x64xf32>
+    %3193 = "ttir.relu"(%3191, %3192) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3194 = tensor.empty() : tensor<1x64xf32>
+    %3195 = "ttir.relu"(%3193, %3194) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3196 = tensor.empty() : tensor<1x64xf32>
+    %3197 = "ttir.relu"(%3195, %3196) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3198 = tensor.empty() : tensor<1x64xf32>
+    %3199 = "ttir.relu"(%3197, %3198) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3200 = tensor.empty() : tensor<1x64xf32>
+    %3201 = "ttir.relu"(%3199, %3200) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3202 = tensor.empty() : tensor<1x64xf32>
+    %3203 = "ttir.relu"(%3201, %3202) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3204 = tensor.empty() : tensor<1x64xf32>
+    %3205 = "ttir.relu"(%3203, %3204) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3206 = tensor.empty() : tensor<1x64xf32>
+    %3207 = "ttir.relu"(%3205, %3206) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3208 = tensor.empty() : tensor<1x64xf32>
+    %3209 = "ttir.relu"(%3207, %3208) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3210 = tensor.empty() : tensor<1x64xf32>
+    %3211 = "ttir.relu"(%3209, %3210) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3212 = tensor.empty() : tensor<1x64xf32>
+    %3213 = "ttir.relu"(%3211, %3212) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3214 = tensor.empty() : tensor<1x64xf32>
+    %3215 = "ttir.relu"(%3213, %3214) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3216 = tensor.empty() : tensor<1x64xf32>
+    %3217 = "ttir.relu"(%3215, %3216) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3218 = tensor.empty() : tensor<1x64xf32>
+    %3219 = "ttir.relu"(%3217, %3218) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3220 = tensor.empty() : tensor<1x64xf32>
+    %3221 = "ttir.relu"(%3219, %3220) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3222 = tensor.empty() : tensor<1x64xf32>
+    %3223 = "ttir.relu"(%3221, %3222) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3224 = tensor.empty() : tensor<1x64xf32>
+    %3225 = "ttir.relu"(%3223, %3224) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3226 = tensor.empty() : tensor<1x64xf32>
+    %3227 = "ttir.relu"(%3225, %3226) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3228 = tensor.empty() : tensor<1x64xf32>
+    %3229 = "ttir.relu"(%3227, %3228) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3230 = tensor.empty() : tensor<1x64xf32>
+    %3231 = "ttir.relu"(%3229, %3230) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3232 = tensor.empty() : tensor<1x64xf32>
+    %3233 = "ttir.relu"(%3231, %3232) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3234 = tensor.empty() : tensor<1x64xf32>
+    %3235 = "ttir.relu"(%3233, %3234) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3236 = tensor.empty() : tensor<1x64xf32>
+    %3237 = "ttir.relu"(%3235, %3236) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3238 = tensor.empty() : tensor<1x64xf32>
+    %3239 = "ttir.relu"(%3237, %3238) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3240 = tensor.empty() : tensor<1x64xf32>
+    %3241 = "ttir.relu"(%3239, %3240) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3242 = tensor.empty() : tensor<1x64xf32>
+    %3243 = "ttir.relu"(%3241, %3242) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3244 = tensor.empty() : tensor<1x64xf32>
+    %3245 = "ttir.relu"(%3243, %3244) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3246 = tensor.empty() : tensor<1x64xf32>
+    %3247 = "ttir.relu"(%3245, %3246) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3248 = tensor.empty() : tensor<1x64xf32>
+    %3249 = "ttir.relu"(%3247, %3248) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3250 = tensor.empty() : tensor<1x64xf32>
+    %3251 = "ttir.relu"(%3249, %3250) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3252 = tensor.empty() : tensor<1x64xf32>
+    %3253 = "ttir.relu"(%3251, %3252) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3254 = tensor.empty() : tensor<1x64xf32>
+    %3255 = "ttir.relu"(%3253, %3254) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3256 = tensor.empty() : tensor<1x64xf32>
+    %3257 = "ttir.relu"(%3255, %3256) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3258 = tensor.empty() : tensor<1x64xf32>
+    %3259 = "ttir.relu"(%3257, %3258) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3260 = tensor.empty() : tensor<1x64xf32>
+    %3261 = "ttir.relu"(%3259, %3260) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3262 = tensor.empty() : tensor<1x64xf32>
+    %3263 = "ttir.relu"(%3261, %3262) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3264 = tensor.empty() : tensor<1x64xf32>
+    %3265 = "ttir.relu"(%3263, %3264) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3266 = tensor.empty() : tensor<1x64xf32>
+    %3267 = "ttir.relu"(%3265, %3266) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3268 = tensor.empty() : tensor<1x64xf32>
+    %3269 = "ttir.relu"(%3267, %3268) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3270 = tensor.empty() : tensor<1x64xf32>
+    %3271 = "ttir.relu"(%3269, %3270) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3272 = tensor.empty() : tensor<1x64xf32>
+    %3273 = "ttir.relu"(%3271, %3272) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3274 = tensor.empty() : tensor<1x64xf32>
+    %3275 = "ttir.relu"(%3273, %3274) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3276 = tensor.empty() : tensor<1x64xf32>
+    %3277 = "ttir.relu"(%3275, %3276) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3278 = tensor.empty() : tensor<1x64xf32>
+    %3279 = "ttir.relu"(%3277, %3278) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3280 = tensor.empty() : tensor<1x64xf32>
+    %3281 = "ttir.relu"(%3279, %3280) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3282 = tensor.empty() : tensor<1x64xf32>
+    %3283 = "ttir.relu"(%3281, %3282) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3284 = tensor.empty() : tensor<1x64xf32>
+    %3285 = "ttir.relu"(%3283, %3284) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3286 = tensor.empty() : tensor<1x64xf32>
+    %3287 = "ttir.relu"(%3285, %3286) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3288 = tensor.empty() : tensor<1x64xf32>
+    %3289 = "ttir.relu"(%3287, %3288) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3290 = tensor.empty() : tensor<1x64xf32>
+    %3291 = "ttir.relu"(%3289, %3290) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3292 = tensor.empty() : tensor<1x64xf32>
+    %3293 = "ttir.relu"(%3291, %3292) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3294 = tensor.empty() : tensor<1x64xf32>
+    %3295 = "ttir.relu"(%3293, %3294) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3296 = tensor.empty() : tensor<1x64xf32>
+    %3297 = "ttir.relu"(%3295, %3296) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3298 = tensor.empty() : tensor<1x64xf32>
+    %3299 = "ttir.relu"(%3297, %3298) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3300 = tensor.empty() : tensor<1x64xf32>
+    %3301 = "ttir.relu"(%3299, %3300) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3302 = tensor.empty() : tensor<1x64xf32>
+    %3303 = "ttir.relu"(%3301, %3302) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3304 = tensor.empty() : tensor<1x64xf32>
+    %3305 = "ttir.relu"(%3303, %3304) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3306 = tensor.empty() : tensor<1x64xf32>
+    %3307 = "ttir.relu"(%3305, %3306) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3308 = tensor.empty() : tensor<1x64xf32>
+    %3309 = "ttir.relu"(%3307, %3308) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3310 = tensor.empty() : tensor<1x64xf32>
+    %3311 = "ttir.relu"(%3309, %3310) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3312 = tensor.empty() : tensor<1x64xf32>
+    %3313 = "ttir.relu"(%3311, %3312) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3314 = tensor.empty() : tensor<1x64xf32>
+    %3315 = "ttir.relu"(%3313, %3314) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3316 = tensor.empty() : tensor<1x64xf32>
+    %3317 = "ttir.relu"(%3315, %3316) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3318 = tensor.empty() : tensor<1x64xf32>
+    %3319 = "ttir.relu"(%3317, %3318) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3320 = tensor.empty() : tensor<1x64xf32>
+    %3321 = "ttir.relu"(%3319, %3320) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3322 = tensor.empty() : tensor<1x64xf32>
+    %3323 = "ttir.relu"(%3321, %3322) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3324 = tensor.empty() : tensor<1x64xf32>
+    %3325 = "ttir.relu"(%3323, %3324) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3326 = tensor.empty() : tensor<1x64xf32>
+    %3327 = "ttir.relu"(%3325, %3326) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3328 = tensor.empty() : tensor<1x64xf32>
+    %3329 = "ttir.relu"(%3327, %3328) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3330 = tensor.empty() : tensor<1x64xf32>
+    %3331 = "ttir.relu"(%3329, %3330) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3332 = tensor.empty() : tensor<1x64xf32>
+    %3333 = "ttir.relu"(%3331, %3332) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3334 = tensor.empty() : tensor<1x64xf32>
+    %3335 = "ttir.relu"(%3333, %3334) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3336 = tensor.empty() : tensor<1x64xf32>
+    %3337 = "ttir.relu"(%3335, %3336) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3338 = tensor.empty() : tensor<1x64xf32>
+    %3339 = "ttir.relu"(%3337, %3338) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3340 = tensor.empty() : tensor<1x64xf32>
+    %3341 = "ttir.relu"(%3339, %3340) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3342 = tensor.empty() : tensor<1x64xf32>
+    %3343 = "ttir.relu"(%3341, %3342) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3344 = tensor.empty() : tensor<1x64xf32>
+    %3345 = "ttir.relu"(%3343, %3344) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3346 = tensor.empty() : tensor<1x64xf32>
+    %3347 = "ttir.relu"(%3345, %3346) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3348 = tensor.empty() : tensor<1x64xf32>
+    %3349 = "ttir.relu"(%3347, %3348) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3350 = tensor.empty() : tensor<1x64xf32>
+    %3351 = "ttir.relu"(%3349, %3350) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3352 = tensor.empty() : tensor<1x64xf32>
+    %3353 = "ttir.relu"(%3351, %3352) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3354 = tensor.empty() : tensor<1x64xf32>
+    %3355 = "ttir.relu"(%3353, %3354) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3356 = tensor.empty() : tensor<1x64xf32>
+    %3357 = "ttir.relu"(%3355, %3356) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3358 = tensor.empty() : tensor<1x64xf32>
+    %3359 = "ttir.relu"(%3357, %3358) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3360 = tensor.empty() : tensor<1x64xf32>
+    %3361 = "ttir.relu"(%3359, %3360) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3362 = tensor.empty() : tensor<1x64xf32>
+    %3363 = "ttir.relu"(%3361, %3362) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3364 = tensor.empty() : tensor<1x64xf32>
+    %3365 = "ttir.relu"(%3363, %3364) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3366 = tensor.empty() : tensor<1x64xf32>
+    %3367 = "ttir.relu"(%3365, %3366) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3368 = tensor.empty() : tensor<1x64xf32>
+    %3369 = "ttir.relu"(%3367, %3368) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3370 = tensor.empty() : tensor<1x64xf32>
+    %3371 = "ttir.relu"(%3369, %3370) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3372 = tensor.empty() : tensor<1x64xf32>
+    %3373 = "ttir.relu"(%3371, %3372) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3374 = tensor.empty() : tensor<1x64xf32>
+    %3375 = "ttir.relu"(%3373, %3374) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3376 = tensor.empty() : tensor<1x64xf32>
+    %3377 = "ttir.relu"(%3375, %3376) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3378 = tensor.empty() : tensor<1x64xf32>
+    %3379 = "ttir.relu"(%3377, %3378) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3380 = tensor.empty() : tensor<1x64xf32>
+    %3381 = "ttir.relu"(%3379, %3380) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3382 = tensor.empty() : tensor<1x64xf32>
+    %3383 = "ttir.relu"(%3381, %3382) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3384 = tensor.empty() : tensor<1x64xf32>
+    %3385 = "ttir.relu"(%3383, %3384) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3386 = tensor.empty() : tensor<1x64xf32>
+    %3387 = "ttir.relu"(%3385, %3386) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3388 = tensor.empty() : tensor<1x64xf32>
+    %3389 = "ttir.relu"(%3387, %3388) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3390 = tensor.empty() : tensor<1x64xf32>
+    %3391 = "ttir.relu"(%3389, %3390) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3392 = tensor.empty() : tensor<1x64xf32>
+    %3393 = "ttir.relu"(%3391, %3392) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3394 = tensor.empty() : tensor<1x64xf32>
+    %3395 = "ttir.relu"(%3393, %3394) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3396 = tensor.empty() : tensor<1x64xf32>
+    %3397 = "ttir.relu"(%3395, %3396) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3398 = tensor.empty() : tensor<1x64xf32>
+    %3399 = "ttir.relu"(%3397, %3398) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3400 = tensor.empty() : tensor<1x64xf32>
+    %3401 = "ttir.relu"(%3399, %3400) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3402 = tensor.empty() : tensor<1x64xf32>
+    %3403 = "ttir.relu"(%3401, %3402) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3404 = tensor.empty() : tensor<1x64xf32>
+    %3405 = "ttir.relu"(%3403, %3404) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3406 = tensor.empty() : tensor<1x64xf32>
+    %3407 = "ttir.relu"(%3405, %3406) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3408 = tensor.empty() : tensor<1x64xf32>
+    %3409 = "ttir.relu"(%3407, %3408) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3410 = tensor.empty() : tensor<1x64xf32>
+    %3411 = "ttir.relu"(%3409, %3410) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3412 = tensor.empty() : tensor<1x64xf32>
+    %3413 = "ttir.relu"(%3411, %3412) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3414 = tensor.empty() : tensor<1x64xf32>
+    %3415 = "ttir.relu"(%3413, %3414) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3416 = tensor.empty() : tensor<1x64xf32>
+    %3417 = "ttir.relu"(%3415, %3416) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3418 = tensor.empty() : tensor<1x64xf32>
+    %3419 = "ttir.relu"(%3417, %3418) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3420 = tensor.empty() : tensor<1x64xf32>
+    %3421 = "ttir.relu"(%3419, %3420) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3422 = tensor.empty() : tensor<1x64xf32>
+    %3423 = "ttir.relu"(%3421, %3422) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3424 = tensor.empty() : tensor<1x64xf32>
+    %3425 = "ttir.relu"(%3423, %3424) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3426 = tensor.empty() : tensor<1x64xf32>
+    %3427 = "ttir.relu"(%3425, %3426) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3428 = tensor.empty() : tensor<1x64xf32>
+    %3429 = "ttir.relu"(%3427, %3428) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3430 = tensor.empty() : tensor<1x64xf32>
+    %3431 = "ttir.relu"(%3429, %3430) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3432 = tensor.empty() : tensor<1x64xf32>
+    %3433 = "ttir.relu"(%3431, %3432) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3434 = tensor.empty() : tensor<1x64xf32>
+    %3435 = "ttir.relu"(%3433, %3434) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3436 = tensor.empty() : tensor<1x64xf32>
+    %3437 = "ttir.relu"(%3435, %3436) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3438 = tensor.empty() : tensor<1x64xf32>
+    %3439 = "ttir.relu"(%3437, %3438) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3440 = tensor.empty() : tensor<1x64xf32>
+    %3441 = "ttir.relu"(%3439, %3440) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3442 = tensor.empty() : tensor<1x64xf32>
+    %3443 = "ttir.relu"(%3441, %3442) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3444 = tensor.empty() : tensor<1x64xf32>
+    %3445 = "ttir.relu"(%3443, %3444) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3446 = tensor.empty() : tensor<1x64xf32>
+    %3447 = "ttir.relu"(%3445, %3446) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3448 = tensor.empty() : tensor<1x64xf32>
+    %3449 = "ttir.relu"(%3447, %3448) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3450 = tensor.empty() : tensor<1x64xf32>
+    %3451 = "ttir.relu"(%3449, %3450) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3452 = tensor.empty() : tensor<1x64xf32>
+    %3453 = "ttir.relu"(%3451, %3452) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3454 = tensor.empty() : tensor<1x64xf32>
+    %3455 = "ttir.relu"(%3453, %3454) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3456 = tensor.empty() : tensor<1x64xf32>
+    %3457 = "ttir.relu"(%3455, %3456) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3458 = tensor.empty() : tensor<1x64xf32>
+    %3459 = "ttir.relu"(%3457, %3458) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3460 = tensor.empty() : tensor<1x64xf32>
+    %3461 = "ttir.relu"(%3459, %3460) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3462 = tensor.empty() : tensor<1x64xf32>
+    %3463 = "ttir.relu"(%3461, %3462) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3464 = tensor.empty() : tensor<1x64xf32>
+    %3465 = "ttir.relu"(%3463, %3464) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3466 = tensor.empty() : tensor<1x64xf32>
+    %3467 = "ttir.relu"(%3465, %3466) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3468 = tensor.empty() : tensor<1x64xf32>
+    %3469 = "ttir.relu"(%3467, %3468) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3470 = tensor.empty() : tensor<1x64xf32>
+    %3471 = "ttir.relu"(%3469, %3470) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3472 = tensor.empty() : tensor<1x64xf32>
+    %3473 = "ttir.relu"(%3471, %3472) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3474 = tensor.empty() : tensor<1x64xf32>
+    %3475 = "ttir.relu"(%3473, %3474) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3476 = tensor.empty() : tensor<1x64xf32>
+    %3477 = "ttir.relu"(%3475, %3476) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3478 = tensor.empty() : tensor<1x64xf32>
+    %3479 = "ttir.relu"(%3477, %3478) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3480 = tensor.empty() : tensor<1x64xf32>
+    %3481 = "ttir.relu"(%3479, %3480) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3482 = tensor.empty() : tensor<1x64xf32>
+    %3483 = "ttir.relu"(%3481, %3482) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3484 = tensor.empty() : tensor<1x64xf32>
+    %3485 = "ttir.relu"(%3483, %3484) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3486 = tensor.empty() : tensor<1x64xf32>
+    %3487 = "ttir.relu"(%3485, %3486) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3488 = tensor.empty() : tensor<1x64xf32>
+    %3489 = "ttir.relu"(%3487, %3488) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3490 = tensor.empty() : tensor<1x64xf32>
+    %3491 = "ttir.relu"(%3489, %3490) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3492 = tensor.empty() : tensor<1x64xf32>
+    %3493 = "ttir.relu"(%3491, %3492) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3494 = tensor.empty() : tensor<1x64xf32>
+    %3495 = "ttir.relu"(%3493, %3494) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3496 = tensor.empty() : tensor<1x64xf32>
+    %3497 = "ttir.relu"(%3495, %3496) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3498 = tensor.empty() : tensor<1x64xf32>
+    %3499 = "ttir.relu"(%3497, %3498) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3500 = tensor.empty() : tensor<1x64xf32>
+    %3501 = "ttir.relu"(%3499, %3500) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3502 = tensor.empty() : tensor<1x64xf32>
+    %3503 = "ttir.relu"(%3501, %3502) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3504 = tensor.empty() : tensor<1x64xf32>
+    %3505 = "ttir.relu"(%3503, %3504) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3506 = tensor.empty() : tensor<1x64xf32>
+    %3507 = "ttir.relu"(%3505, %3506) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3508 = tensor.empty() : tensor<1x64xf32>
+    %3509 = "ttir.relu"(%3507, %3508) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3510 = tensor.empty() : tensor<1x64xf32>
+    %3511 = "ttir.relu"(%3509, %3510) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3512 = tensor.empty() : tensor<1x64xf32>
+    %3513 = "ttir.relu"(%3511, %3512) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3514 = tensor.empty() : tensor<1x64xf32>
+    %3515 = "ttir.relu"(%3513, %3514) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3516 = tensor.empty() : tensor<1x64xf32>
+    %3517 = "ttir.relu"(%3515, %3516) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3518 = tensor.empty() : tensor<1x64xf32>
+    %3519 = "ttir.relu"(%3517, %3518) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3520 = tensor.empty() : tensor<1x64xf32>
+    %3521 = "ttir.relu"(%3519, %3520) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3522 = tensor.empty() : tensor<1x64xf32>
+    %3523 = "ttir.relu"(%3521, %3522) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3524 = tensor.empty() : tensor<1x64xf32>
+    %3525 = "ttir.relu"(%3523, %3524) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3526 = tensor.empty() : tensor<1x64xf32>
+    %3527 = "ttir.relu"(%3525, %3526) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3528 = tensor.empty() : tensor<1x64xf32>
+    %3529 = "ttir.relu"(%3527, %3528) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3530 = tensor.empty() : tensor<1x64xf32>
+    %3531 = "ttir.relu"(%3529, %3530) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3532 = tensor.empty() : tensor<1x64xf32>
+    %3533 = "ttir.relu"(%3531, %3532) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3534 = tensor.empty() : tensor<1x64xf32>
+    %3535 = "ttir.relu"(%3533, %3534) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3536 = tensor.empty() : tensor<1x64xf32>
+    %3537 = "ttir.relu"(%3535, %3536) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3538 = tensor.empty() : tensor<1x64xf32>
+    %3539 = "ttir.relu"(%3537, %3538) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3540 = tensor.empty() : tensor<1x64xf32>
+    %3541 = "ttir.relu"(%3539, %3540) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3542 = tensor.empty() : tensor<1x64xf32>
+    %3543 = "ttir.relu"(%3541, %3542) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3544 = tensor.empty() : tensor<1x64xf32>
+    %3545 = "ttir.relu"(%3543, %3544) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3546 = tensor.empty() : tensor<1x64xf32>
+    %3547 = "ttir.relu"(%3545, %3546) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3548 = tensor.empty() : tensor<1x64xf32>
+    %3549 = "ttir.relu"(%3547, %3548) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3550 = tensor.empty() : tensor<1x64xf32>
+    %3551 = "ttir.relu"(%3549, %3550) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3552 = tensor.empty() : tensor<1x64xf32>
+    %3553 = "ttir.relu"(%3551, %3552) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3554 = tensor.empty() : tensor<1x64xf32>
+    %3555 = "ttir.relu"(%3553, %3554) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3556 = tensor.empty() : tensor<1x64xf32>
+    %3557 = "ttir.relu"(%3555, %3556) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3558 = tensor.empty() : tensor<1x64xf32>
+    %3559 = "ttir.relu"(%3557, %3558) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3560 = tensor.empty() : tensor<1x64xf32>
+    %3561 = "ttir.relu"(%3559, %3560) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3562 = tensor.empty() : tensor<1x64xf32>
+    %3563 = "ttir.relu"(%3561, %3562) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3564 = tensor.empty() : tensor<1x64xf32>
+    %3565 = "ttir.relu"(%3563, %3564) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3566 = tensor.empty() : tensor<1x64xf32>
+    %3567 = "ttir.relu"(%3565, %3566) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3568 = tensor.empty() : tensor<1x64xf32>
+    %3569 = "ttir.relu"(%3567, %3568) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3570 = tensor.empty() : tensor<1x64xf32>
+    %3571 = "ttir.relu"(%3569, %3570) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3572 = tensor.empty() : tensor<1x64xf32>
+    %3573 = "ttir.relu"(%3571, %3572) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3574 = tensor.empty() : tensor<1x64xf32>
+    %3575 = "ttir.relu"(%3573, %3574) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3576 = tensor.empty() : tensor<1x64xf32>
+    %3577 = "ttir.relu"(%3575, %3576) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3578 = tensor.empty() : tensor<1x64xf32>
+    %3579 = "ttir.relu"(%3577, %3578) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3580 = tensor.empty() : tensor<1x64xf32>
+    %3581 = "ttir.relu"(%3579, %3580) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3582 = tensor.empty() : tensor<1x64xf32>
+    %3583 = "ttir.relu"(%3581, %3582) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3584 = tensor.empty() : tensor<1x64xf32>
+    %3585 = "ttir.relu"(%3583, %3584) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3586 = tensor.empty() : tensor<1x64xf32>
+    %3587 = "ttir.relu"(%3585, %3586) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3588 = tensor.empty() : tensor<1x64xf32>
+    %3589 = "ttir.relu"(%3587, %3588) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3590 = tensor.empty() : tensor<1x64xf32>
+    %3591 = "ttir.relu"(%3589, %3590) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3592 = tensor.empty() : tensor<1x64xf32>
+    %3593 = "ttir.relu"(%3591, %3592) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3594 = tensor.empty() : tensor<1x64xf32>
+    %3595 = "ttir.relu"(%3593, %3594) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3596 = tensor.empty() : tensor<1x64xf32>
+    %3597 = "ttir.relu"(%3595, %3596) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3598 = tensor.empty() : tensor<1x64xf32>
+    %3599 = "ttir.relu"(%3597, %3598) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3600 = tensor.empty() : tensor<1x64xf32>
+    %3601 = "ttir.relu"(%3599, %3600) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3602 = tensor.empty() : tensor<1x64xf32>
+    %3603 = "ttir.relu"(%3601, %3602) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3604 = tensor.empty() : tensor<1x64xf32>
+    %3605 = "ttir.relu"(%3603, %3604) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3606 = tensor.empty() : tensor<1x64xf32>
+    %3607 = "ttir.relu"(%3605, %3606) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3608 = tensor.empty() : tensor<1x64xf32>
+    %3609 = "ttir.relu"(%3607, %3608) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3610 = tensor.empty() : tensor<1x64xf32>
+    %3611 = "ttir.relu"(%3609, %3610) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3612 = tensor.empty() : tensor<1x64xf32>
+    %3613 = "ttir.relu"(%3611, %3612) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3614 = tensor.empty() : tensor<1x64xf32>
+    %3615 = "ttir.relu"(%3613, %3614) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3616 = tensor.empty() : tensor<1x64xf32>
+    %3617 = "ttir.relu"(%3615, %3616) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3618 = tensor.empty() : tensor<1x64xf32>
+    %3619 = "ttir.relu"(%3617, %3618) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3620 = tensor.empty() : tensor<1x64xf32>
+    %3621 = "ttir.relu"(%3619, %3620) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3622 = tensor.empty() : tensor<1x64xf32>
+    %3623 = "ttir.relu"(%3621, %3622) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3624 = tensor.empty() : tensor<1x64xf32>
+    %3625 = "ttir.relu"(%3623, %3624) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3626 = tensor.empty() : tensor<1x64xf32>
+    %3627 = "ttir.relu"(%3625, %3626) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3628 = tensor.empty() : tensor<1x64xf32>
+    %3629 = "ttir.relu"(%3627, %3628) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3630 = tensor.empty() : tensor<1x64xf32>
+    %3631 = "ttir.relu"(%3629, %3630) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3632 = tensor.empty() : tensor<1x64xf32>
+    %3633 = "ttir.relu"(%3631, %3632) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3634 = tensor.empty() : tensor<1x64xf32>
+    %3635 = "ttir.relu"(%3633, %3634) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3636 = tensor.empty() : tensor<1x64xf32>
+    %3637 = "ttir.relu"(%3635, %3636) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3638 = tensor.empty() : tensor<1x64xf32>
+    %3639 = "ttir.relu"(%3637, %3638) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3640 = tensor.empty() : tensor<1x64xf32>
+    %3641 = "ttir.relu"(%3639, %3640) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3642 = tensor.empty() : tensor<1x64xf32>
+    %3643 = "ttir.relu"(%3641, %3642) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3644 = tensor.empty() : tensor<1x64xf32>
+    %3645 = "ttir.relu"(%3643, %3644) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3646 = tensor.empty() : tensor<1x64xf32>
+    %3647 = "ttir.relu"(%3645, %3646) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3648 = tensor.empty() : tensor<1x64xf32>
+    %3649 = "ttir.relu"(%3647, %3648) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3650 = tensor.empty() : tensor<1x64xf32>
+    %3651 = "ttir.relu"(%3649, %3650) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3652 = tensor.empty() : tensor<1x64xf32>
+    %3653 = "ttir.relu"(%3651, %3652) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3654 = tensor.empty() : tensor<1x64xf32>
+    %3655 = "ttir.relu"(%3653, %3654) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3656 = tensor.empty() : tensor<1x64xf32>
+    %3657 = "ttir.relu"(%3655, %3656) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3658 = tensor.empty() : tensor<1x64xf32>
+    %3659 = "ttir.relu"(%3657, %3658) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3660 = tensor.empty() : tensor<1x64xf32>
+    %3661 = "ttir.relu"(%3659, %3660) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3662 = tensor.empty() : tensor<1x64xf32>
+    %3663 = "ttir.relu"(%3661, %3662) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3664 = tensor.empty() : tensor<1x64xf32>
+    %3665 = "ttir.relu"(%3663, %3664) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3666 = tensor.empty() : tensor<1x64xf32>
+    %3667 = "ttir.relu"(%3665, %3666) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3668 = tensor.empty() : tensor<1x64xf32>
+    %3669 = "ttir.relu"(%3667, %3668) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3670 = tensor.empty() : tensor<1x64xf32>
+    %3671 = "ttir.relu"(%3669, %3670) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3672 = tensor.empty() : tensor<1x64xf32>
+    %3673 = "ttir.relu"(%3671, %3672) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3674 = tensor.empty() : tensor<1x64xf32>
+    %3675 = "ttir.relu"(%3673, %3674) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3676 = tensor.empty() : tensor<1x64xf32>
+    %3677 = "ttir.relu"(%3675, %3676) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3678 = tensor.empty() : tensor<1x64xf32>
+    %3679 = "ttir.relu"(%3677, %3678) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3680 = tensor.empty() : tensor<1x64xf32>
+    %3681 = "ttir.relu"(%3679, %3680) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3682 = tensor.empty() : tensor<1x64xf32>
+    %3683 = "ttir.relu"(%3681, %3682) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3684 = tensor.empty() : tensor<1x64xf32>
+    %3685 = "ttir.relu"(%3683, %3684) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3686 = tensor.empty() : tensor<1x64xf32>
+    %3687 = "ttir.relu"(%3685, %3686) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3688 = tensor.empty() : tensor<1x64xf32>
+    %3689 = "ttir.relu"(%3687, %3688) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3690 = tensor.empty() : tensor<1x64xf32>
+    %3691 = "ttir.relu"(%3689, %3690) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3692 = tensor.empty() : tensor<1x64xf32>
+    %3693 = "ttir.relu"(%3691, %3692) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3694 = tensor.empty() : tensor<1x64xf32>
+    %3695 = "ttir.relu"(%3693, %3694) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3696 = tensor.empty() : tensor<1x64xf32>
+    %3697 = "ttir.relu"(%3695, %3696) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3698 = tensor.empty() : tensor<1x64xf32>
+    %3699 = "ttir.relu"(%3697, %3698) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3700 = tensor.empty() : tensor<1x64xf32>
+    %3701 = "ttir.relu"(%3699, %3700) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3702 = tensor.empty() : tensor<1x64xf32>
+    %3703 = "ttir.relu"(%3701, %3702) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3704 = tensor.empty() : tensor<1x64xf32>
+    %3705 = "ttir.relu"(%3703, %3704) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3706 = tensor.empty() : tensor<1x64xf32>
+    %3707 = "ttir.relu"(%3705, %3706) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3708 = tensor.empty() : tensor<1x64xf32>
+    %3709 = "ttir.relu"(%3707, %3708) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3710 = tensor.empty() : tensor<1x64xf32>
+    %3711 = "ttir.relu"(%3709, %3710) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3712 = tensor.empty() : tensor<1x64xf32>
+    %3713 = "ttir.relu"(%3711, %3712) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3714 = tensor.empty() : tensor<1x64xf32>
+    %3715 = "ttir.relu"(%3713, %3714) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3716 = tensor.empty() : tensor<1x64xf32>
+    %3717 = "ttir.relu"(%3715, %3716) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3718 = tensor.empty() : tensor<1x64xf32>
+    %3719 = "ttir.relu"(%3717, %3718) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3720 = tensor.empty() : tensor<1x64xf32>
+    %3721 = "ttir.relu"(%3719, %3720) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3722 = tensor.empty() : tensor<1x64xf32>
+    %3723 = "ttir.relu"(%3721, %3722) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3724 = tensor.empty() : tensor<1x64xf32>
+    %3725 = "ttir.relu"(%3723, %3724) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3726 = tensor.empty() : tensor<1x64xf32>
+    %3727 = "ttir.relu"(%3725, %3726) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3728 = tensor.empty() : tensor<1x64xf32>
+    %3729 = "ttir.relu"(%3727, %3728) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3730 = tensor.empty() : tensor<1x64xf32>
+    %3731 = "ttir.relu"(%3729, %3730) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3732 = tensor.empty() : tensor<1x64xf32>
+    %3733 = "ttir.relu"(%3731, %3732) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3734 = tensor.empty() : tensor<1x64xf32>
+    %3735 = "ttir.relu"(%3733, %3734) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3736 = tensor.empty() : tensor<1x64xf32>
+    %3737 = "ttir.relu"(%3735, %3736) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3738 = tensor.empty() : tensor<1x64xf32>
+    %3739 = "ttir.relu"(%3737, %3738) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3740 = tensor.empty() : tensor<1x64xf32>
+    %3741 = "ttir.relu"(%3739, %3740) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3742 = tensor.empty() : tensor<1x64xf32>
+    %3743 = "ttir.relu"(%3741, %3742) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3744 = tensor.empty() : tensor<1x64xf32>
+    %3745 = "ttir.relu"(%3743, %3744) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3746 = tensor.empty() : tensor<1x64xf32>
+    %3747 = "ttir.relu"(%3745, %3746) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3748 = tensor.empty() : tensor<1x64xf32>
+    %3749 = "ttir.relu"(%3747, %3748) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3750 = tensor.empty() : tensor<1x64xf32>
+    %3751 = "ttir.relu"(%3749, %3750) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3752 = tensor.empty() : tensor<1x64xf32>
+    %3753 = "ttir.relu"(%3751, %3752) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3754 = tensor.empty() : tensor<1x64xf32>
+    %3755 = "ttir.relu"(%3753, %3754) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3756 = tensor.empty() : tensor<1x64xf32>
+    %3757 = "ttir.relu"(%3755, %3756) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3758 = tensor.empty() : tensor<1x64xf32>
+    %3759 = "ttir.relu"(%3757, %3758) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3760 = tensor.empty() : tensor<1x64xf32>
+    %3761 = "ttir.relu"(%3759, %3760) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3762 = tensor.empty() : tensor<1x64xf32>
+    %3763 = "ttir.relu"(%3761, %3762) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3764 = tensor.empty() : tensor<1x64xf32>
+    %3765 = "ttir.relu"(%3763, %3764) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3766 = tensor.empty() : tensor<1x64xf32>
+    %3767 = "ttir.relu"(%3765, %3766) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3768 = tensor.empty() : tensor<1x64xf32>
+    %3769 = "ttir.relu"(%3767, %3768) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3770 = tensor.empty() : tensor<1x64xf32>
+    %3771 = "ttir.relu"(%3769, %3770) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3772 = tensor.empty() : tensor<1x64xf32>
+    %3773 = "ttir.relu"(%3771, %3772) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3774 = tensor.empty() : tensor<1x64xf32>
+    %3775 = "ttir.relu"(%3773, %3774) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3776 = tensor.empty() : tensor<1x64xf32>
+    %3777 = "ttir.relu"(%3775, %3776) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3778 = tensor.empty() : tensor<1x64xf32>
+    %3779 = "ttir.relu"(%3777, %3778) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3780 = tensor.empty() : tensor<1x64xf32>
+    %3781 = "ttir.relu"(%3779, %3780) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3782 = tensor.empty() : tensor<1x64xf32>
+    %3783 = "ttir.relu"(%3781, %3782) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3784 = tensor.empty() : tensor<1x64xf32>
+    %3785 = "ttir.relu"(%3783, %3784) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3786 = tensor.empty() : tensor<1x64xf32>
+    %3787 = "ttir.relu"(%3785, %3786) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3788 = tensor.empty() : tensor<1x64xf32>
+    %3789 = "ttir.relu"(%3787, %3788) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3790 = tensor.empty() : tensor<1x64xf32>
+    %3791 = "ttir.relu"(%3789, %3790) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3792 = tensor.empty() : tensor<1x64xf32>
+    %3793 = "ttir.relu"(%3791, %3792) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3794 = tensor.empty() : tensor<1x64xf32>
+    %3795 = "ttir.relu"(%3793, %3794) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3796 = tensor.empty() : tensor<1x64xf32>
+    %3797 = "ttir.relu"(%3795, %3796) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3798 = tensor.empty() : tensor<1x64xf32>
+    %3799 = "ttir.relu"(%3797, %3798) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3800 = tensor.empty() : tensor<1x64xf32>
+    %3801 = "ttir.relu"(%3799, %3800) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3802 = tensor.empty() : tensor<1x64xf32>
+    %3803 = "ttir.relu"(%3801, %3802) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3804 = tensor.empty() : tensor<1x64xf32>
+    %3805 = "ttir.relu"(%3803, %3804) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3806 = tensor.empty() : tensor<1x64xf32>
+    %3807 = "ttir.relu"(%3805, %3806) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3808 = tensor.empty() : tensor<1x64xf32>
+    %3809 = "ttir.relu"(%3807, %3808) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3810 = tensor.empty() : tensor<1x64xf32>
+    %3811 = "ttir.relu"(%3809, %3810) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3812 = tensor.empty() : tensor<1x64xf32>
+    %3813 = "ttir.relu"(%3811, %3812) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3814 = tensor.empty() : tensor<1x64xf32>
+    %3815 = "ttir.relu"(%3813, %3814) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3816 = tensor.empty() : tensor<1x64xf32>
+    %3817 = "ttir.relu"(%3815, %3816) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3818 = tensor.empty() : tensor<1x64xf32>
+    %3819 = "ttir.relu"(%3817, %3818) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3820 = tensor.empty() : tensor<1x64xf32>
+    %3821 = "ttir.relu"(%3819, %3820) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3822 = tensor.empty() : tensor<1x64xf32>
+    %3823 = "ttir.relu"(%3821, %3822) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3824 = tensor.empty() : tensor<1x64xf32>
+    %3825 = "ttir.relu"(%3823, %3824) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3826 = tensor.empty() : tensor<1x64xf32>
+    %3827 = "ttir.relu"(%3825, %3826) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3828 = tensor.empty() : tensor<1x64xf32>
+    %3829 = "ttir.relu"(%3827, %3828) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3830 = tensor.empty() : tensor<1x64xf32>
+    %3831 = "ttir.relu"(%3829, %3830) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3832 = tensor.empty() : tensor<1x64xf32>
+    %3833 = "ttir.relu"(%3831, %3832) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3834 = tensor.empty() : tensor<1x64xf32>
+    %3835 = "ttir.relu"(%3833, %3834) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3836 = tensor.empty() : tensor<1x64xf32>
+    %3837 = "ttir.relu"(%3835, %3836) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3838 = tensor.empty() : tensor<1x64xf32>
+    %3839 = "ttir.relu"(%3837, %3838) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3840 = tensor.empty() : tensor<1x64xf32>
+    %3841 = "ttir.relu"(%3839, %3840) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3842 = tensor.empty() : tensor<1x64xf32>
+    %3843 = "ttir.relu"(%3841, %3842) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3844 = tensor.empty() : tensor<1x64xf32>
+    %3845 = "ttir.relu"(%3843, %3844) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3846 = tensor.empty() : tensor<1x64xf32>
+    %3847 = "ttir.relu"(%3845, %3846) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3848 = tensor.empty() : tensor<1x64xf32>
+    %3849 = "ttir.relu"(%3847, %3848) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3850 = tensor.empty() : tensor<1x64xf32>
+    %3851 = "ttir.relu"(%3849, %3850) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3852 = tensor.empty() : tensor<1x64xf32>
+    %3853 = "ttir.relu"(%3851, %3852) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3854 = tensor.empty() : tensor<1x64xf32>
+    %3855 = "ttir.relu"(%3853, %3854) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3856 = tensor.empty() : tensor<1x64xf32>
+    %3857 = "ttir.relu"(%3855, %3856) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3858 = tensor.empty() : tensor<1x64xf32>
+    %3859 = "ttir.relu"(%3857, %3858) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3860 = tensor.empty() : tensor<1x64xf32>
+    %3861 = "ttir.relu"(%3859, %3860) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3862 = tensor.empty() : tensor<1x64xf32>
+    %3863 = "ttir.relu"(%3861, %3862) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3864 = tensor.empty() : tensor<1x64xf32>
+    %3865 = "ttir.relu"(%3863, %3864) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3866 = tensor.empty() : tensor<1x64xf32>
+    %3867 = "ttir.relu"(%3865, %3866) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3868 = tensor.empty() : tensor<1x64xf32>
+    %3869 = "ttir.relu"(%3867, %3868) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3870 = tensor.empty() : tensor<1x64xf32>
+    %3871 = "ttir.relu"(%3869, %3870) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3872 = tensor.empty() : tensor<1x64xf32>
+    %3873 = "ttir.relu"(%3871, %3872) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3874 = tensor.empty() : tensor<1x64xf32>
+    %3875 = "ttir.relu"(%3873, %3874) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3876 = tensor.empty() : tensor<1x64xf32>
+    %3877 = "ttir.relu"(%3875, %3876) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3878 = tensor.empty() : tensor<1x64xf32>
+    %3879 = "ttir.relu"(%3877, %3878) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3880 = tensor.empty() : tensor<1x64xf32>
+    %3881 = "ttir.relu"(%3879, %3880) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3882 = tensor.empty() : tensor<1x64xf32>
+    %3883 = "ttir.relu"(%3881, %3882) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3884 = tensor.empty() : tensor<1x64xf32>
+    %3885 = "ttir.relu"(%3883, %3884) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3886 = tensor.empty() : tensor<1x64xf32>
+    %3887 = "ttir.relu"(%3885, %3886) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3888 = tensor.empty() : tensor<1x64xf32>
+    %3889 = "ttir.relu"(%3887, %3888) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3890 = tensor.empty() : tensor<1x64xf32>
+    %3891 = "ttir.relu"(%3889, %3890) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3892 = tensor.empty() : tensor<1x64xf32>
+    %3893 = "ttir.relu"(%3891, %3892) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3894 = tensor.empty() : tensor<1x64xf32>
+    %3895 = "ttir.relu"(%3893, %3894) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3896 = tensor.empty() : tensor<1x64xf32>
+    %3897 = "ttir.relu"(%3895, %3896) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3898 = tensor.empty() : tensor<1x64xf32>
+    %3899 = "ttir.relu"(%3897, %3898) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3900 = tensor.empty() : tensor<1x64xf32>
+    %3901 = "ttir.relu"(%3899, %3900) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3902 = tensor.empty() : tensor<1x64xf32>
+    %3903 = "ttir.relu"(%3901, %3902) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3904 = tensor.empty() : tensor<1x64xf32>
+    %3905 = "ttir.relu"(%3903, %3904) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3906 = tensor.empty() : tensor<1x64xf32>
+    %3907 = "ttir.relu"(%3905, %3906) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3908 = tensor.empty() : tensor<1x64xf32>
+    %3909 = "ttir.relu"(%3907, %3908) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3910 = tensor.empty() : tensor<1x64xf32>
+    %3911 = "ttir.relu"(%3909, %3910) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3912 = tensor.empty() : tensor<1x64xf32>
+    %3913 = "ttir.relu"(%3911, %3912) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3914 = tensor.empty() : tensor<1x64xf32>
+    %3915 = "ttir.relu"(%3913, %3914) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3916 = tensor.empty() : tensor<1x64xf32>
+    %3917 = "ttir.relu"(%3915, %3916) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3918 = tensor.empty() : tensor<1x64xf32>
+    %3919 = "ttir.relu"(%3917, %3918) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3920 = tensor.empty() : tensor<1x64xf32>
+    %3921 = "ttir.relu"(%3919, %3920) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3922 = tensor.empty() : tensor<1x64xf32>
+    %3923 = "ttir.relu"(%3921, %3922) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3924 = tensor.empty() : tensor<1x64xf32>
+    %3925 = "ttir.relu"(%3923, %3924) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3926 = tensor.empty() : tensor<1x64xf32>
+    %3927 = "ttir.relu"(%3925, %3926) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3928 = tensor.empty() : tensor<1x64xf32>
+    %3929 = "ttir.relu"(%3927, %3928) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3930 = tensor.empty() : tensor<1x64xf32>
+    %3931 = "ttir.relu"(%3929, %3930) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3932 = tensor.empty() : tensor<1x64xf32>
+    %3933 = "ttir.relu"(%3931, %3932) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3934 = tensor.empty() : tensor<1x64xf32>
+    %3935 = "ttir.relu"(%3933, %3934) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3936 = tensor.empty() : tensor<1x64xf32>
+    %3937 = "ttir.relu"(%3935, %3936) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3938 = tensor.empty() : tensor<1x64xf32>
+    %3939 = "ttir.relu"(%3937, %3938) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3940 = tensor.empty() : tensor<1x64xf32>
+    %3941 = "ttir.relu"(%3939, %3940) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3942 = tensor.empty() : tensor<1x64xf32>
+    %3943 = "ttir.relu"(%3941, %3942) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3944 = tensor.empty() : tensor<1x64xf32>
+    %3945 = "ttir.relu"(%3943, %3944) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3946 = tensor.empty() : tensor<1x64xf32>
+    %3947 = "ttir.relu"(%3945, %3946) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3948 = tensor.empty() : tensor<1x64xf32>
+    %3949 = "ttir.relu"(%3947, %3948) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3950 = tensor.empty() : tensor<1x64xf32>
+    %3951 = "ttir.relu"(%3949, %3950) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3952 = tensor.empty() : tensor<1x64xf32>
+    %3953 = "ttir.relu"(%3951, %3952) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3954 = tensor.empty() : tensor<1x64xf32>
+    %3955 = "ttir.relu"(%3953, %3954) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3956 = tensor.empty() : tensor<1x64xf32>
+    %3957 = "ttir.relu"(%3955, %3956) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3958 = tensor.empty() : tensor<1x64xf32>
+    %3959 = "ttir.relu"(%3957, %3958) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3960 = tensor.empty() : tensor<1x64xf32>
+    %3961 = "ttir.relu"(%3959, %3960) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3962 = tensor.empty() : tensor<1x64xf32>
+    %3963 = "ttir.relu"(%3961, %3962) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3964 = tensor.empty() : tensor<1x64xf32>
+    %3965 = "ttir.relu"(%3963, %3964) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3966 = tensor.empty() : tensor<1x64xf32>
+    %3967 = "ttir.relu"(%3965, %3966) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3968 = tensor.empty() : tensor<1x64xf32>
+    %3969 = "ttir.relu"(%3967, %3968) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3970 = tensor.empty() : tensor<1x64xf32>
+    %3971 = "ttir.relu"(%3969, %3970) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3972 = tensor.empty() : tensor<1x64xf32>
+    %3973 = "ttir.relu"(%3971, %3972) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3974 = tensor.empty() : tensor<1x64xf32>
+    %3975 = "ttir.relu"(%3973, %3974) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3976 = tensor.empty() : tensor<1x64xf32>
+    %3977 = "ttir.relu"(%3975, %3976) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3978 = tensor.empty() : tensor<1x64xf32>
+    %3979 = "ttir.relu"(%3977, %3978) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3980 = tensor.empty() : tensor<1x64xf32>
+    %3981 = "ttir.relu"(%3979, %3980) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3982 = tensor.empty() : tensor<1x64xf32>
+    %3983 = "ttir.relu"(%3981, %3982) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3984 = tensor.empty() : tensor<1x64xf32>
+    %3985 = "ttir.relu"(%3983, %3984) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3986 = tensor.empty() : tensor<1x64xf32>
+    %3987 = "ttir.relu"(%3985, %3986) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3988 = tensor.empty() : tensor<1x64xf32>
+    %3989 = "ttir.relu"(%3987, %3988) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3990 = tensor.empty() : tensor<1x64xf32>
+    %3991 = "ttir.relu"(%3989, %3990) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3992 = tensor.empty() : tensor<1x64xf32>
+    %3993 = "ttir.relu"(%3991, %3992) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3994 = tensor.empty() : tensor<1x64xf32>
+    %3995 = "ttir.relu"(%3993, %3994) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3996 = tensor.empty() : tensor<1x64xf32>
+    %3997 = "ttir.relu"(%3995, %3996) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %3998 = tensor.empty() : tensor<1x64xf32>
+    %3999 = "ttir.relu"(%3997, %3998) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4000 = tensor.empty() : tensor<1x64xf32>
+    %4001 = "ttir.relu"(%3999, %4000) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4002 = tensor.empty() : tensor<1x64xf32>
+    %4003 = "ttir.relu"(%4001, %4002) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4004 = tensor.empty() : tensor<1x64xf32>
+    %4005 = "ttir.relu"(%4003, %4004) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4006 = tensor.empty() : tensor<1x64xf32>
+    %4007 = "ttir.relu"(%4005, %4006) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4008 = tensor.empty() : tensor<1x64xf32>
+    %4009 = "ttir.relu"(%4007, %4008) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4010 = tensor.empty() : tensor<1x64xf32>
+    %4011 = "ttir.relu"(%4009, %4010) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4012 = tensor.empty() : tensor<1x64xf32>
+    %4013 = "ttir.relu"(%4011, %4012) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4014 = tensor.empty() : tensor<1x64xf32>
+    %4015 = "ttir.relu"(%4013, %4014) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4016 = tensor.empty() : tensor<1x64xf32>
+    %4017 = "ttir.relu"(%4015, %4016) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4018 = tensor.empty() : tensor<1x64xf32>
+    %4019 = "ttir.relu"(%4017, %4018) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4020 = tensor.empty() : tensor<1x64xf32>
+    %4021 = "ttir.relu"(%4019, %4020) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4022 = tensor.empty() : tensor<1x64xf32>
+    %4023 = "ttir.relu"(%4021, %4022) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4024 = tensor.empty() : tensor<1x64xf32>
+    %4025 = "ttir.relu"(%4023, %4024) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4026 = tensor.empty() : tensor<1x64xf32>
+    %4027 = "ttir.relu"(%4025, %4026) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4028 = tensor.empty() : tensor<1x64xf32>
+    %4029 = "ttir.relu"(%4027, %4028) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4030 = tensor.empty() : tensor<1x64xf32>
+    %4031 = "ttir.relu"(%4029, %4030) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4032 = tensor.empty() : tensor<1x64xf32>
+    %4033 = "ttir.relu"(%4031, %4032) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4034 = tensor.empty() : tensor<1x64xf32>
+    %4035 = "ttir.relu"(%4033, %4034) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4036 = tensor.empty() : tensor<1x64xf32>
+    %4037 = "ttir.relu"(%4035, %4036) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4038 = tensor.empty() : tensor<1x64xf32>
+    %4039 = "ttir.relu"(%4037, %4038) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4040 = tensor.empty() : tensor<1x64xf32>
+    %4041 = "ttir.relu"(%4039, %4040) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4042 = tensor.empty() : tensor<1x64xf32>
+    %4043 = "ttir.relu"(%4041, %4042) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4044 = tensor.empty() : tensor<1x64xf32>
+    %4045 = "ttir.relu"(%4043, %4044) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4046 = tensor.empty() : tensor<1x64xf32>
+    %4047 = "ttir.relu"(%4045, %4046) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4048 = tensor.empty() : tensor<1x64xf32>
+    %4049 = "ttir.relu"(%4047, %4048) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4050 = tensor.empty() : tensor<1x64xf32>
+    %4051 = "ttir.relu"(%4049, %4050) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4052 = tensor.empty() : tensor<1x64xf32>
+    %4053 = "ttir.relu"(%4051, %4052) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4054 = tensor.empty() : tensor<1x64xf32>
+    %4055 = "ttir.relu"(%4053, %4054) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4056 = tensor.empty() : tensor<1x64xf32>
+    %4057 = "ttir.relu"(%4055, %4056) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4058 = tensor.empty() : tensor<1x64xf32>
+    %4059 = "ttir.relu"(%4057, %4058) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4060 = tensor.empty() : tensor<1x64xf32>
+    %4061 = "ttir.relu"(%4059, %4060) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4062 = tensor.empty() : tensor<1x64xf32>
+    %4063 = "ttir.relu"(%4061, %4062) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4064 = tensor.empty() : tensor<1x64xf32>
+    %4065 = "ttir.relu"(%4063, %4064) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4066 = tensor.empty() : tensor<1x64xf32>
+    %4067 = "ttir.relu"(%4065, %4066) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4068 = tensor.empty() : tensor<1x64xf32>
+    %4069 = "ttir.relu"(%4067, %4068) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4070 = tensor.empty() : tensor<1x64xf32>
+    %4071 = "ttir.relu"(%4069, %4070) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4072 = tensor.empty() : tensor<1x64xf32>
+    %4073 = "ttir.relu"(%4071, %4072) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4074 = tensor.empty() : tensor<1x64xf32>
+    %4075 = "ttir.relu"(%4073, %4074) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4076 = tensor.empty() : tensor<1x64xf32>
+    %4077 = "ttir.relu"(%4075, %4076) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4078 = tensor.empty() : tensor<1x64xf32>
+    %4079 = "ttir.relu"(%4077, %4078) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4080 = tensor.empty() : tensor<1x64xf32>
+    %4081 = "ttir.relu"(%4079, %4080) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4082 = tensor.empty() : tensor<1x64xf32>
+    %4083 = "ttir.relu"(%4081, %4082) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4084 = tensor.empty() : tensor<1x64xf32>
+    %4085 = "ttir.relu"(%4083, %4084) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4086 = tensor.empty() : tensor<1x64xf32>
+    %4087 = "ttir.relu"(%4085, %4086) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4088 = tensor.empty() : tensor<1x64xf32>
+    %4089 = "ttir.relu"(%4087, %4088) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4090 = tensor.empty() : tensor<1x64xf32>
+    %4091 = "ttir.relu"(%4089, %4090) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4092 = tensor.empty() : tensor<1x64xf32>
+    %4093 = "ttir.relu"(%4091, %4092) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4094 = tensor.empty() : tensor<1x64xf32>
+    %4095 = "ttir.relu"(%4093, %4094) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4096 = tensor.empty() : tensor<1x64xf32>
+    %4097 = "ttir.relu"(%4095, %4096) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4098 = tensor.empty() : tensor<1x64xf32>
+    %4099 = "ttir.relu"(%4097, %4098) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4100 = tensor.empty() : tensor<1x64xf32>
+    %4101 = "ttir.relu"(%4099, %4100) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4102 = tensor.empty() : tensor<1x64xf32>
+    %4103 = "ttir.relu"(%4101, %4102) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4104 = tensor.empty() : tensor<1x64xf32>
+    %4105 = "ttir.relu"(%4103, %4104) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4106 = tensor.empty() : tensor<1x64xf32>
+    %4107 = "ttir.relu"(%4105, %4106) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4108 = tensor.empty() : tensor<1x64xf32>
+    %4109 = "ttir.relu"(%4107, %4108) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4110 = tensor.empty() : tensor<1x64xf32>
+    %4111 = "ttir.relu"(%4109, %4110) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4112 = tensor.empty() : tensor<1x64xf32>
+    %4113 = "ttir.relu"(%4111, %4112) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4114 = tensor.empty() : tensor<1x64xf32>
+    %4115 = "ttir.relu"(%4113, %4114) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4116 = tensor.empty() : tensor<1x64xf32>
+    %4117 = "ttir.relu"(%4115, %4116) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4118 = tensor.empty() : tensor<1x64xf32>
+    %4119 = "ttir.relu"(%4117, %4118) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4120 = tensor.empty() : tensor<1x64xf32>
+    %4121 = "ttir.relu"(%4119, %4120) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4122 = tensor.empty() : tensor<1x64xf32>
+    %4123 = "ttir.relu"(%4121, %4122) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4124 = tensor.empty() : tensor<1x64xf32>
+    %4125 = "ttir.relu"(%4123, %4124) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4126 = tensor.empty() : tensor<1x64xf32>
+    %4127 = "ttir.relu"(%4125, %4126) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4128 = tensor.empty() : tensor<1x64xf32>
+    %4129 = "ttir.relu"(%4127, %4128) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4130 = tensor.empty() : tensor<1x64xf32>
+    %4131 = "ttir.relu"(%4129, %4130) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4132 = tensor.empty() : tensor<1x64xf32>
+    %4133 = "ttir.relu"(%4131, %4132) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4134 = tensor.empty() : tensor<1x64xf32>
+    %4135 = "ttir.relu"(%4133, %4134) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4136 = tensor.empty() : tensor<1x64xf32>
+    %4137 = "ttir.relu"(%4135, %4136) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4138 = tensor.empty() : tensor<1x64xf32>
+    %4139 = "ttir.relu"(%4137, %4138) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4140 = tensor.empty() : tensor<1x64xf32>
+    %4141 = "ttir.relu"(%4139, %4140) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4142 = tensor.empty() : tensor<1x64xf32>
+    %4143 = "ttir.relu"(%4141, %4142) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4144 = tensor.empty() : tensor<1x64xf32>
+    %4145 = "ttir.relu"(%4143, %4144) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4146 = tensor.empty() : tensor<1x64xf32>
+    %4147 = "ttir.relu"(%4145, %4146) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4148 = tensor.empty() : tensor<1x64xf32>
+    %4149 = "ttir.relu"(%4147, %4148) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4150 = tensor.empty() : tensor<1x64xf32>
+    %4151 = "ttir.relu"(%4149, %4150) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4152 = tensor.empty() : tensor<1x64xf32>
+    %4153 = "ttir.relu"(%4151, %4152) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4154 = tensor.empty() : tensor<1x64xf32>
+    %4155 = "ttir.relu"(%4153, %4154) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4156 = tensor.empty() : tensor<1x64xf32>
+    %4157 = "ttir.relu"(%4155, %4156) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4158 = tensor.empty() : tensor<1x64xf32>
+    %4159 = "ttir.relu"(%4157, %4158) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4160 = tensor.empty() : tensor<1x64xf32>
+    %4161 = "ttir.relu"(%4159, %4160) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4162 = tensor.empty() : tensor<1x64xf32>
+    %4163 = "ttir.relu"(%4161, %4162) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4164 = tensor.empty() : tensor<1x64xf32>
+    %4165 = "ttir.relu"(%4163, %4164) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4166 = tensor.empty() : tensor<1x64xf32>
+    %4167 = "ttir.relu"(%4165, %4166) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4168 = tensor.empty() : tensor<1x64xf32>
+    %4169 = "ttir.relu"(%4167, %4168) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4170 = tensor.empty() : tensor<1x64xf32>
+    %4171 = "ttir.relu"(%4169, %4170) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4172 = tensor.empty() : tensor<1x64xf32>
+    %4173 = "ttir.relu"(%4171, %4172) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4174 = tensor.empty() : tensor<1x64xf32>
+    %4175 = "ttir.relu"(%4173, %4174) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4176 = tensor.empty() : tensor<1x64xf32>
+    %4177 = "ttir.relu"(%4175, %4176) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4178 = tensor.empty() : tensor<1x64xf32>
+    %4179 = "ttir.relu"(%4177, %4178) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4180 = tensor.empty() : tensor<1x64xf32>
+    %4181 = "ttir.relu"(%4179, %4180) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4182 = tensor.empty() : tensor<1x64xf32>
+    %4183 = "ttir.relu"(%4181, %4182) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4184 = tensor.empty() : tensor<1x64xf32>
+    %4185 = "ttir.relu"(%4183, %4184) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4186 = tensor.empty() : tensor<1x64xf32>
+    %4187 = "ttir.relu"(%4185, %4186) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4188 = tensor.empty() : tensor<1x64xf32>
+    %4189 = "ttir.relu"(%4187, %4188) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4190 = tensor.empty() : tensor<1x64xf32>
+    %4191 = "ttir.relu"(%4189, %4190) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4192 = tensor.empty() : tensor<1x64xf32>
+    %4193 = "ttir.relu"(%4191, %4192) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4194 = tensor.empty() : tensor<1x64xf32>
+    %4195 = "ttir.relu"(%4193, %4194) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4196 = tensor.empty() : tensor<1x64xf32>
+    %4197 = "ttir.relu"(%4195, %4196) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4198 = tensor.empty() : tensor<1x64xf32>
+    %4199 = "ttir.relu"(%4197, %4198) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4200 = tensor.empty() : tensor<1x64xf32>
+    %4201 = "ttir.relu"(%4199, %4200) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4202 = tensor.empty() : tensor<1x64xf32>
+    %4203 = "ttir.relu"(%4201, %4202) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4204 = tensor.empty() : tensor<1x64xf32>
+    %4205 = "ttir.relu"(%4203, %4204) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4206 = tensor.empty() : tensor<1x64xf32>
+    %4207 = "ttir.relu"(%4205, %4206) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4208 = tensor.empty() : tensor<1x64xf32>
+    %4209 = "ttir.relu"(%4207, %4208) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4210 = tensor.empty() : tensor<1x64xf32>
+    %4211 = "ttir.relu"(%4209, %4210) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4212 = tensor.empty() : tensor<1x64xf32>
+    %4213 = "ttir.relu"(%4211, %4212) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4214 = tensor.empty() : tensor<1x64xf32>
+    %4215 = "ttir.relu"(%4213, %4214) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4216 = tensor.empty() : tensor<1x64xf32>
+    %4217 = "ttir.relu"(%4215, %4216) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4218 = tensor.empty() : tensor<1x64xf32>
+    %4219 = "ttir.relu"(%4217, %4218) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4220 = tensor.empty() : tensor<1x64xf32>
+    %4221 = "ttir.relu"(%4219, %4220) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4222 = tensor.empty() : tensor<1x64xf32>
+    %4223 = "ttir.relu"(%4221, %4222) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4224 = tensor.empty() : tensor<1x64xf32>
+    %4225 = "ttir.relu"(%4223, %4224) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4226 = tensor.empty() : tensor<1x64xf32>
+    %4227 = "ttir.relu"(%4225, %4226) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4228 = tensor.empty() : tensor<1x64xf32>
+    %4229 = "ttir.relu"(%4227, %4228) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4230 = tensor.empty() : tensor<1x64xf32>
+    %4231 = "ttir.relu"(%4229, %4230) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4232 = tensor.empty() : tensor<1x64xf32>
+    %4233 = "ttir.relu"(%4231, %4232) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4234 = tensor.empty() : tensor<1x64xf32>
+    %4235 = "ttir.relu"(%4233, %4234) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4236 = tensor.empty() : tensor<1x64xf32>
+    %4237 = "ttir.relu"(%4235, %4236) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4238 = tensor.empty() : tensor<1x64xf32>
+    %4239 = "ttir.relu"(%4237, %4238) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4240 = tensor.empty() : tensor<1x64xf32>
+    %4241 = "ttir.relu"(%4239, %4240) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4242 = tensor.empty() : tensor<1x64xf32>
+    %4243 = "ttir.relu"(%4241, %4242) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4244 = tensor.empty() : tensor<1x64xf32>
+    %4245 = "ttir.relu"(%4243, %4244) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4246 = tensor.empty() : tensor<1x64xf32>
+    %4247 = "ttir.relu"(%4245, %4246) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4248 = tensor.empty() : tensor<1x64xf32>
+    %4249 = "ttir.relu"(%4247, %4248) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4250 = tensor.empty() : tensor<1x64xf32>
+    %4251 = "ttir.relu"(%4249, %4250) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4252 = tensor.empty() : tensor<1x64xf32>
+    %4253 = "ttir.relu"(%4251, %4252) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4254 = tensor.empty() : tensor<1x64xf32>
+    %4255 = "ttir.relu"(%4253, %4254) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4256 = tensor.empty() : tensor<1x64xf32>
+    %4257 = "ttir.relu"(%4255, %4256) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4258 = tensor.empty() : tensor<1x64xf32>
+    %4259 = "ttir.relu"(%4257, %4258) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4260 = tensor.empty() : tensor<1x64xf32>
+    %4261 = "ttir.relu"(%4259, %4260) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4262 = tensor.empty() : tensor<1x64xf32>
+    %4263 = "ttir.relu"(%4261, %4262) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4264 = tensor.empty() : tensor<1x64xf32>
+    %4265 = "ttir.relu"(%4263, %4264) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4266 = tensor.empty() : tensor<1x64xf32>
+    %4267 = "ttir.relu"(%4265, %4266) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4268 = tensor.empty() : tensor<1x64xf32>
+    %4269 = "ttir.relu"(%4267, %4268) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4270 = tensor.empty() : tensor<1x64xf32>
+    %4271 = "ttir.relu"(%4269, %4270) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4272 = tensor.empty() : tensor<1x64xf32>
+    %4273 = "ttir.relu"(%4271, %4272) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4274 = tensor.empty() : tensor<1x64xf32>
+    %4275 = "ttir.relu"(%4273, %4274) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4276 = tensor.empty() : tensor<1x64xf32>
+    %4277 = "ttir.relu"(%4275, %4276) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4278 = tensor.empty() : tensor<1x64xf32>
+    %4279 = "ttir.relu"(%4277, %4278) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4280 = tensor.empty() : tensor<1x64xf32>
+    %4281 = "ttir.relu"(%4279, %4280) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4282 = tensor.empty() : tensor<1x64xf32>
+    %4283 = "ttir.relu"(%4281, %4282) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4284 = tensor.empty() : tensor<1x64xf32>
+    %4285 = "ttir.relu"(%4283, %4284) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4286 = tensor.empty() : tensor<1x64xf32>
+    %4287 = "ttir.relu"(%4285, %4286) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4288 = tensor.empty() : tensor<1x64xf32>
+    %4289 = "ttir.relu"(%4287, %4288) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4290 = tensor.empty() : tensor<1x64xf32>
+    %4291 = "ttir.relu"(%4289, %4290) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4292 = tensor.empty() : tensor<1x64xf32>
+    %4293 = "ttir.relu"(%4291, %4292) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4294 = tensor.empty() : tensor<1x64xf32>
+    %4295 = "ttir.relu"(%4293, %4294) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4296 = tensor.empty() : tensor<1x64xf32>
+    %4297 = "ttir.relu"(%4295, %4296) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4298 = tensor.empty() : tensor<1x64xf32>
+    %4299 = "ttir.relu"(%4297, %4298) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4300 = tensor.empty() : tensor<1x64xf32>
+    %4301 = "ttir.relu"(%4299, %4300) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4302 = tensor.empty() : tensor<1x64xf32>
+    %4303 = "ttir.relu"(%4301, %4302) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4304 = tensor.empty() : tensor<1x64xf32>
+    %4305 = "ttir.relu"(%4303, %4304) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4306 = tensor.empty() : tensor<1x64xf32>
+    %4307 = "ttir.relu"(%4305, %4306) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4308 = tensor.empty() : tensor<1x64xf32>
+    %4309 = "ttir.relu"(%4307, %4308) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4310 = tensor.empty() : tensor<1x64xf32>
+    %4311 = "ttir.relu"(%4309, %4310) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4312 = tensor.empty() : tensor<1x64xf32>
+    %4313 = "ttir.relu"(%4311, %4312) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4314 = tensor.empty() : tensor<1x64xf32>
+    %4315 = "ttir.relu"(%4313, %4314) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4316 = tensor.empty() : tensor<1x64xf32>
+    %4317 = "ttir.relu"(%4315, %4316) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4318 = tensor.empty() : tensor<1x64xf32>
+    %4319 = "ttir.relu"(%4317, %4318) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4320 = tensor.empty() : tensor<1x64xf32>
+    %4321 = "ttir.relu"(%4319, %4320) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4322 = tensor.empty() : tensor<1x64xf32>
+    %4323 = "ttir.relu"(%4321, %4322) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4324 = tensor.empty() : tensor<1x64xf32>
+    %4325 = "ttir.relu"(%4323, %4324) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4326 = tensor.empty() : tensor<1x64xf32>
+    %4327 = "ttir.relu"(%4325, %4326) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4328 = tensor.empty() : tensor<1x64xf32>
+    %4329 = "ttir.relu"(%4327, %4328) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4330 = tensor.empty() : tensor<1x64xf32>
+    %4331 = "ttir.relu"(%4329, %4330) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4332 = tensor.empty() : tensor<1x64xf32>
+    %4333 = "ttir.relu"(%4331, %4332) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4334 = tensor.empty() : tensor<1x64xf32>
+    %4335 = "ttir.relu"(%4333, %4334) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4336 = tensor.empty() : tensor<1x64xf32>
+    %4337 = "ttir.relu"(%4335, %4336) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4338 = tensor.empty() : tensor<1x64xf32>
+    %4339 = "ttir.relu"(%4337, %4338) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4340 = tensor.empty() : tensor<1x64xf32>
+    %4341 = "ttir.relu"(%4339, %4340) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4342 = tensor.empty() : tensor<1x64xf32>
+    %4343 = "ttir.relu"(%4341, %4342) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4344 = tensor.empty() : tensor<1x64xf32>
+    %4345 = "ttir.relu"(%4343, %4344) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4346 = tensor.empty() : tensor<1x64xf32>
+    %4347 = "ttir.relu"(%4345, %4346) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4348 = tensor.empty() : tensor<1x64xf32>
+    %4349 = "ttir.relu"(%4347, %4348) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4350 = tensor.empty() : tensor<1x64xf32>
+    %4351 = "ttir.relu"(%4349, %4350) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4352 = tensor.empty() : tensor<1x64xf32>
+    %4353 = "ttir.relu"(%4351, %4352) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4354 = tensor.empty() : tensor<1x64xf32>
+    %4355 = "ttir.relu"(%4353, %4354) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4356 = tensor.empty() : tensor<1x64xf32>
+    %4357 = "ttir.relu"(%4355, %4356) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4358 = tensor.empty() : tensor<1x64xf32>
+    %4359 = "ttir.relu"(%4357, %4358) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4360 = tensor.empty() : tensor<1x64xf32>
+    %4361 = "ttir.relu"(%4359, %4360) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4362 = tensor.empty() : tensor<1x64xf32>
+    %4363 = "ttir.relu"(%4361, %4362) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4364 = tensor.empty() : tensor<1x64xf32>
+    %4365 = "ttir.relu"(%4363, %4364) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4366 = tensor.empty() : tensor<1x64xf32>
+    %4367 = "ttir.relu"(%4365, %4366) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4368 = tensor.empty() : tensor<1x64xf32>
+    %4369 = "ttir.relu"(%4367, %4368) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4370 = tensor.empty() : tensor<1x64xf32>
+    %4371 = "ttir.relu"(%4369, %4370) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4372 = tensor.empty() : tensor<1x64xf32>
+    %4373 = "ttir.relu"(%4371, %4372) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4374 = tensor.empty() : tensor<1x64xf32>
+    %4375 = "ttir.relu"(%4373, %4374) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4376 = tensor.empty() : tensor<1x64xf32>
+    %4377 = "ttir.relu"(%4375, %4376) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4378 = tensor.empty() : tensor<1x64xf32>
+    %4379 = "ttir.relu"(%4377, %4378) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4380 = tensor.empty() : tensor<1x64xf32>
+    %4381 = "ttir.relu"(%4379, %4380) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4382 = tensor.empty() : tensor<1x64xf32>
+    %4383 = "ttir.relu"(%4381, %4382) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4384 = tensor.empty() : tensor<1x64xf32>
+    %4385 = "ttir.relu"(%4383, %4384) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4386 = tensor.empty() : tensor<1x64xf32>
+    %4387 = "ttir.relu"(%4385, %4386) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4388 = tensor.empty() : tensor<1x64xf32>
+    %4389 = "ttir.relu"(%4387, %4388) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4390 = tensor.empty() : tensor<1x64xf32>
+    %4391 = "ttir.relu"(%4389, %4390) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4392 = tensor.empty() : tensor<1x64xf32>
+    %4393 = "ttir.relu"(%4391, %4392) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4394 = tensor.empty() : tensor<1x64xf32>
+    %4395 = "ttir.relu"(%4393, %4394) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4396 = tensor.empty() : tensor<1x64xf32>
+    %4397 = "ttir.relu"(%4395, %4396) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4398 = tensor.empty() : tensor<1x64xf32>
+    %4399 = "ttir.relu"(%4397, %4398) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4400 = tensor.empty() : tensor<1x64xf32>
+    %4401 = "ttir.relu"(%4399, %4400) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4402 = tensor.empty() : tensor<1x64xf32>
+    %4403 = "ttir.relu"(%4401, %4402) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4404 = tensor.empty() : tensor<1x64xf32>
+    %4405 = "ttir.relu"(%4403, %4404) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4406 = tensor.empty() : tensor<1x64xf32>
+    %4407 = "ttir.relu"(%4405, %4406) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4408 = tensor.empty() : tensor<1x64xf32>
+    %4409 = "ttir.relu"(%4407, %4408) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4410 = tensor.empty() : tensor<1x64xf32>
+    %4411 = "ttir.relu"(%4409, %4410) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4412 = tensor.empty() : tensor<1x64xf32>
+    %4413 = "ttir.relu"(%4411, %4412) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4414 = tensor.empty() : tensor<1x64xf32>
+    %4415 = "ttir.relu"(%4413, %4414) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4416 = tensor.empty() : tensor<1x64xf32>
+    %4417 = "ttir.relu"(%4415, %4416) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4418 = tensor.empty() : tensor<1x64xf32>
+    %4419 = "ttir.relu"(%4417, %4418) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4420 = tensor.empty() : tensor<1x64xf32>
+    %4421 = "ttir.relu"(%4419, %4420) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4422 = tensor.empty() : tensor<1x64xf32>
+    %4423 = "ttir.relu"(%4421, %4422) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4424 = tensor.empty() : tensor<1x64xf32>
+    %4425 = "ttir.relu"(%4423, %4424) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4426 = tensor.empty() : tensor<1x64xf32>
+    %4427 = "ttir.relu"(%4425, %4426) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4428 = tensor.empty() : tensor<1x64xf32>
+    %4429 = "ttir.relu"(%4427, %4428) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4430 = tensor.empty() : tensor<1x64xf32>
+    %4431 = "ttir.relu"(%4429, %4430) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4432 = tensor.empty() : tensor<1x64xf32>
+    %4433 = "ttir.relu"(%4431, %4432) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4434 = tensor.empty() : tensor<1x64xf32>
+    %4435 = "ttir.relu"(%4433, %4434) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4436 = tensor.empty() : tensor<1x64xf32>
+    %4437 = "ttir.relu"(%4435, %4436) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4438 = tensor.empty() : tensor<1x64xf32>
+    %4439 = "ttir.relu"(%4437, %4438) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4440 = tensor.empty() : tensor<1x64xf32>
+    %4441 = "ttir.relu"(%4439, %4440) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4442 = tensor.empty() : tensor<1x64xf32>
+    %4443 = "ttir.relu"(%4441, %4442) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4444 = tensor.empty() : tensor<1x64xf32>
+    %4445 = "ttir.relu"(%4443, %4444) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4446 = tensor.empty() : tensor<1x64xf32>
+    %4447 = "ttir.relu"(%4445, %4446) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4448 = tensor.empty() : tensor<1x64xf32>
+    %4449 = "ttir.relu"(%4447, %4448) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4450 = tensor.empty() : tensor<1x64xf32>
+    %4451 = "ttir.relu"(%4449, %4450) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4452 = tensor.empty() : tensor<1x64xf32>
+    %4453 = "ttir.relu"(%4451, %4452) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4454 = tensor.empty() : tensor<1x64xf32>
+    %4455 = "ttir.relu"(%4453, %4454) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4456 = tensor.empty() : tensor<1x64xf32>
+    %4457 = "ttir.relu"(%4455, %4456) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4458 = tensor.empty() : tensor<1x64xf32>
+    %4459 = "ttir.relu"(%4457, %4458) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4460 = tensor.empty() : tensor<1x64xf32>
+    %4461 = "ttir.relu"(%4459, %4460) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4462 = tensor.empty() : tensor<1x64xf32>
+    %4463 = "ttir.relu"(%4461, %4462) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4464 = tensor.empty() : tensor<1x64xf32>
+    %4465 = "ttir.relu"(%4463, %4464) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4466 = tensor.empty() : tensor<1x64xf32>
+    %4467 = "ttir.relu"(%4465, %4466) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4468 = tensor.empty() : tensor<1x64xf32>
+    %4469 = "ttir.relu"(%4467, %4468) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4470 = tensor.empty() : tensor<1x64xf32>
+    %4471 = "ttir.relu"(%4469, %4470) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4472 = tensor.empty() : tensor<1x64xf32>
+    %4473 = "ttir.relu"(%4471, %4472) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4474 = tensor.empty() : tensor<1x64xf32>
+    %4475 = "ttir.relu"(%4473, %4474) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4476 = tensor.empty() : tensor<1x64xf32>
+    %4477 = "ttir.relu"(%4475, %4476) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4478 = tensor.empty() : tensor<1x64xf32>
+    %4479 = "ttir.relu"(%4477, %4478) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4480 = tensor.empty() : tensor<1x64xf32>
+    %4481 = "ttir.relu"(%4479, %4480) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4482 = tensor.empty() : tensor<1x64xf32>
+    %4483 = "ttir.relu"(%4481, %4482) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4484 = tensor.empty() : tensor<1x64xf32>
+    %4485 = "ttir.relu"(%4483, %4484) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4486 = tensor.empty() : tensor<1x64xf32>
+    %4487 = "ttir.relu"(%4485, %4486) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4488 = tensor.empty() : tensor<1x64xf32>
+    %4489 = "ttir.relu"(%4487, %4488) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4490 = tensor.empty() : tensor<1x64xf32>
+    %4491 = "ttir.relu"(%4489, %4490) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4492 = tensor.empty() : tensor<1x64xf32>
+    %4493 = "ttir.relu"(%4491, %4492) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4494 = tensor.empty() : tensor<1x64xf32>
+    %4495 = "ttir.relu"(%4493, %4494) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4496 = tensor.empty() : tensor<1x64xf32>
+    %4497 = "ttir.relu"(%4495, %4496) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4498 = tensor.empty() : tensor<1x64xf32>
+    %4499 = "ttir.relu"(%4497, %4498) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4500 = tensor.empty() : tensor<1x64xf32>
+    %4501 = "ttir.relu"(%4499, %4500) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4502 = tensor.empty() : tensor<1x64xf32>
+    %4503 = "ttir.relu"(%4501, %4502) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4504 = tensor.empty() : tensor<1x64xf32>
+    %4505 = "ttir.relu"(%4503, %4504) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4506 = tensor.empty() : tensor<1x64xf32>
+    %4507 = "ttir.relu"(%4505, %4506) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4508 = tensor.empty() : tensor<1x64xf32>
+    %4509 = "ttir.relu"(%4507, %4508) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4510 = tensor.empty() : tensor<1x64xf32>
+    %4511 = "ttir.relu"(%4509, %4510) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4512 = tensor.empty() : tensor<1x64xf32>
+    %4513 = "ttir.relu"(%4511, %4512) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4514 = tensor.empty() : tensor<1x64xf32>
+    %4515 = "ttir.relu"(%4513, %4514) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4516 = tensor.empty() : tensor<1x64xf32>
+    %4517 = "ttir.relu"(%4515, %4516) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4518 = tensor.empty() : tensor<1x64xf32>
+    %4519 = "ttir.relu"(%4517, %4518) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4520 = tensor.empty() : tensor<1x64xf32>
+    %4521 = "ttir.relu"(%4519, %4520) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4522 = tensor.empty() : tensor<1x64xf32>
+    %4523 = "ttir.relu"(%4521, %4522) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4524 = tensor.empty() : tensor<1x64xf32>
+    %4525 = "ttir.relu"(%4523, %4524) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4526 = tensor.empty() : tensor<1x64xf32>
+    %4527 = "ttir.relu"(%4525, %4526) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4528 = tensor.empty() : tensor<1x64xf32>
+    %4529 = "ttir.relu"(%4527, %4528) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4530 = tensor.empty() : tensor<1x64xf32>
+    %4531 = "ttir.relu"(%4529, %4530) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4532 = tensor.empty() : tensor<1x64xf32>
+    %4533 = "ttir.relu"(%4531, %4532) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4534 = tensor.empty() : tensor<1x64xf32>
+    %4535 = "ttir.relu"(%4533, %4534) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4536 = tensor.empty() : tensor<1x64xf32>
+    %4537 = "ttir.relu"(%4535, %4536) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4538 = tensor.empty() : tensor<1x64xf32>
+    %4539 = "ttir.relu"(%4537, %4538) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4540 = tensor.empty() : tensor<1x64xf32>
+    %4541 = "ttir.relu"(%4539, %4540) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4542 = tensor.empty() : tensor<1x64xf32>
+    %4543 = "ttir.relu"(%4541, %4542) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4544 = tensor.empty() : tensor<1x64xf32>
+    %4545 = "ttir.relu"(%4543, %4544) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4546 = tensor.empty() : tensor<1x64xf32>
+    %4547 = "ttir.relu"(%4545, %4546) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4548 = tensor.empty() : tensor<1x64xf32>
+    %4549 = "ttir.relu"(%4547, %4548) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4550 = tensor.empty() : tensor<1x64xf32>
+    %4551 = "ttir.relu"(%4549, %4550) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4552 = tensor.empty() : tensor<1x64xf32>
+    %4553 = "ttir.relu"(%4551, %4552) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4554 = tensor.empty() : tensor<1x64xf32>
+    %4555 = "ttir.relu"(%4553, %4554) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4556 = tensor.empty() : tensor<1x64xf32>
+    %4557 = "ttir.relu"(%4555, %4556) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4558 = tensor.empty() : tensor<1x64xf32>
+    %4559 = "ttir.relu"(%4557, %4558) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4560 = tensor.empty() : tensor<1x64xf32>
+    %4561 = "ttir.relu"(%4559, %4560) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4562 = tensor.empty() : tensor<1x64xf32>
+    %4563 = "ttir.relu"(%4561, %4562) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4564 = tensor.empty() : tensor<1x64xf32>
+    %4565 = "ttir.relu"(%4563, %4564) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4566 = tensor.empty() : tensor<1x64xf32>
+    %4567 = "ttir.relu"(%4565, %4566) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4568 = tensor.empty() : tensor<1x64xf32>
+    %4569 = "ttir.relu"(%4567, %4568) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4570 = tensor.empty() : tensor<1x64xf32>
+    %4571 = "ttir.relu"(%4569, %4570) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4572 = tensor.empty() : tensor<1x64xf32>
+    %4573 = "ttir.relu"(%4571, %4572) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4574 = tensor.empty() : tensor<1x64xf32>
+    %4575 = "ttir.relu"(%4573, %4574) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4576 = tensor.empty() : tensor<1x64xf32>
+    %4577 = "ttir.relu"(%4575, %4576) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4578 = tensor.empty() : tensor<1x64xf32>
+    %4579 = "ttir.relu"(%4577, %4578) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4580 = tensor.empty() : tensor<1x64xf32>
+    %4581 = "ttir.relu"(%4579, %4580) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4582 = tensor.empty() : tensor<1x64xf32>
+    %4583 = "ttir.relu"(%4581, %4582) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4584 = tensor.empty() : tensor<1x64xf32>
+    %4585 = "ttir.relu"(%4583, %4584) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4586 = tensor.empty() : tensor<1x64xf32>
+    %4587 = "ttir.relu"(%4585, %4586) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4588 = tensor.empty() : tensor<1x64xf32>
+    %4589 = "ttir.relu"(%4587, %4588) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4590 = tensor.empty() : tensor<1x64xf32>
+    %4591 = "ttir.relu"(%4589, %4590) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4592 = tensor.empty() : tensor<1x64xf32>
+    %4593 = "ttir.relu"(%4591, %4592) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4594 = tensor.empty() : tensor<1x64xf32>
+    %4595 = "ttir.relu"(%4593, %4594) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4596 = tensor.empty() : tensor<1x64xf32>
+    %4597 = "ttir.relu"(%4595, %4596) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4598 = tensor.empty() : tensor<1x64xf32>
+    %4599 = "ttir.relu"(%4597, %4598) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4600 = tensor.empty() : tensor<1x64xf32>
+    %4601 = "ttir.relu"(%4599, %4600) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4602 = tensor.empty() : tensor<1x64xf32>
+    %4603 = "ttir.relu"(%4601, %4602) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4604 = tensor.empty() : tensor<1x64xf32>
+    %4605 = "ttir.relu"(%4603, %4604) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4606 = tensor.empty() : tensor<1x64xf32>
+    %4607 = "ttir.relu"(%4605, %4606) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4608 = tensor.empty() : tensor<1x64xf32>
+    %4609 = "ttir.relu"(%4607, %4608) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4610 = tensor.empty() : tensor<1x64xf32>
+    %4611 = "ttir.relu"(%4609, %4610) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4612 = tensor.empty() : tensor<1x64xf32>
+    %4613 = "ttir.relu"(%4611, %4612) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4614 = tensor.empty() : tensor<1x64xf32>
+    %4615 = "ttir.relu"(%4613, %4614) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4616 = tensor.empty() : tensor<1x64xf32>
+    %4617 = "ttir.relu"(%4615, %4616) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4618 = tensor.empty() : tensor<1x64xf32>
+    %4619 = "ttir.relu"(%4617, %4618) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4620 = tensor.empty() : tensor<1x64xf32>
+    %4621 = "ttir.relu"(%4619, %4620) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4622 = tensor.empty() : tensor<1x64xf32>
+    %4623 = "ttir.relu"(%4621, %4622) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4624 = tensor.empty() : tensor<1x64xf32>
+    %4625 = "ttir.relu"(%4623, %4624) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4626 = tensor.empty() : tensor<1x64xf32>
+    %4627 = "ttir.relu"(%4625, %4626) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4628 = tensor.empty() : tensor<1x64xf32>
+    %4629 = "ttir.relu"(%4627, %4628) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4630 = tensor.empty() : tensor<1x64xf32>
+    %4631 = "ttir.relu"(%4629, %4630) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4632 = tensor.empty() : tensor<1x64xf32>
+    %4633 = "ttir.relu"(%4631, %4632) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4634 = tensor.empty() : tensor<1x64xf32>
+    %4635 = "ttir.relu"(%4633, %4634) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4636 = tensor.empty() : tensor<1x64xf32>
+    %4637 = "ttir.relu"(%4635, %4636) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4638 = tensor.empty() : tensor<1x64xf32>
+    %4639 = "ttir.relu"(%4637, %4638) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4640 = tensor.empty() : tensor<1x64xf32>
+    %4641 = "ttir.relu"(%4639, %4640) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4642 = tensor.empty() : tensor<1x64xf32>
+    %4643 = "ttir.relu"(%4641, %4642) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4644 = tensor.empty() : tensor<1x64xf32>
+    %4645 = "ttir.relu"(%4643, %4644) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4646 = tensor.empty() : tensor<1x64xf32>
+    %4647 = "ttir.relu"(%4645, %4646) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4648 = tensor.empty() : tensor<1x64xf32>
+    %4649 = "ttir.relu"(%4647, %4648) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4650 = tensor.empty() : tensor<1x64xf32>
+    %4651 = "ttir.relu"(%4649, %4650) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4652 = tensor.empty() : tensor<1x64xf32>
+    %4653 = "ttir.relu"(%4651, %4652) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4654 = tensor.empty() : tensor<1x64xf32>
+    %4655 = "ttir.relu"(%4653, %4654) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4656 = tensor.empty() : tensor<1x64xf32>
+    %4657 = "ttir.relu"(%4655, %4656) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4658 = tensor.empty() : tensor<1x64xf32>
+    %4659 = "ttir.relu"(%4657, %4658) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4660 = tensor.empty() : tensor<1x64xf32>
+    %4661 = "ttir.relu"(%4659, %4660) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4662 = tensor.empty() : tensor<1x64xf32>
+    %4663 = "ttir.relu"(%4661, %4662) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4664 = tensor.empty() : tensor<1x64xf32>
+    %4665 = "ttir.relu"(%4663, %4664) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4666 = tensor.empty() : tensor<1x64xf32>
+    %4667 = "ttir.relu"(%4665, %4666) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4668 = tensor.empty() : tensor<1x64xf32>
+    %4669 = "ttir.relu"(%4667, %4668) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4670 = tensor.empty() : tensor<1x64xf32>
+    %4671 = "ttir.relu"(%4669, %4670) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4672 = tensor.empty() : tensor<1x64xf32>
+    %4673 = "ttir.relu"(%4671, %4672) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4674 = tensor.empty() : tensor<1x64xf32>
+    %4675 = "ttir.relu"(%4673, %4674) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4676 = tensor.empty() : tensor<1x64xf32>
+    %4677 = "ttir.relu"(%4675, %4676) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4678 = tensor.empty() : tensor<1x64xf32>
+    %4679 = "ttir.relu"(%4677, %4678) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4680 = tensor.empty() : tensor<1x64xf32>
+    %4681 = "ttir.relu"(%4679, %4680) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4682 = tensor.empty() : tensor<1x64xf32>
+    %4683 = "ttir.relu"(%4681, %4682) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4684 = tensor.empty() : tensor<1x64xf32>
+    %4685 = "ttir.relu"(%4683, %4684) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4686 = tensor.empty() : tensor<1x64xf32>
+    %4687 = "ttir.relu"(%4685, %4686) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4688 = tensor.empty() : tensor<1x64xf32>
+    %4689 = "ttir.relu"(%4687, %4688) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4690 = tensor.empty() : tensor<1x64xf32>
+    %4691 = "ttir.relu"(%4689, %4690) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4692 = tensor.empty() : tensor<1x64xf32>
+    %4693 = "ttir.relu"(%4691, %4692) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4694 = tensor.empty() : tensor<1x64xf32>
+    %4695 = "ttir.relu"(%4693, %4694) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4696 = tensor.empty() : tensor<1x64xf32>
+    %4697 = "ttir.relu"(%4695, %4696) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4698 = tensor.empty() : tensor<1x64xf32>
+    %4699 = "ttir.relu"(%4697, %4698) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4700 = tensor.empty() : tensor<1x64xf32>
+    %4701 = "ttir.relu"(%4699, %4700) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4702 = tensor.empty() : tensor<1x64xf32>
+    %4703 = "ttir.relu"(%4701, %4702) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4704 = tensor.empty() : tensor<1x64xf32>
+    %4705 = "ttir.relu"(%4703, %4704) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4706 = tensor.empty() : tensor<1x64xf32>
+    %4707 = "ttir.relu"(%4705, %4706) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4708 = tensor.empty() : tensor<1x64xf32>
+    %4709 = "ttir.relu"(%4707, %4708) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4710 = tensor.empty() : tensor<1x64xf32>
+    %4711 = "ttir.relu"(%4709, %4710) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4712 = tensor.empty() : tensor<1x64xf32>
+    %4713 = "ttir.relu"(%4711, %4712) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4714 = tensor.empty() : tensor<1x64xf32>
+    %4715 = "ttir.relu"(%4713, %4714) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4716 = tensor.empty() : tensor<1x64xf32>
+    %4717 = "ttir.relu"(%4715, %4716) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4718 = tensor.empty() : tensor<1x64xf32>
+    %4719 = "ttir.relu"(%4717, %4718) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4720 = tensor.empty() : tensor<1x64xf32>
+    %4721 = "ttir.relu"(%4719, %4720) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4722 = tensor.empty() : tensor<1x64xf32>
+    %4723 = "ttir.relu"(%4721, %4722) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4724 = tensor.empty() : tensor<1x64xf32>
+    %4725 = "ttir.relu"(%4723, %4724) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4726 = tensor.empty() : tensor<1x64xf32>
+    %4727 = "ttir.relu"(%4725, %4726) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4728 = tensor.empty() : tensor<1x64xf32>
+    %4729 = "ttir.relu"(%4727, %4728) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4730 = tensor.empty() : tensor<1x64xf32>
+    %4731 = "ttir.relu"(%4729, %4730) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4732 = tensor.empty() : tensor<1x64xf32>
+    %4733 = "ttir.relu"(%4731, %4732) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4734 = tensor.empty() : tensor<1x64xf32>
+    %4735 = "ttir.relu"(%4733, %4734) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4736 = tensor.empty() : tensor<1x64xf32>
+    %4737 = "ttir.relu"(%4735, %4736) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4738 = tensor.empty() : tensor<1x64xf32>
+    %4739 = "ttir.relu"(%4737, %4738) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4740 = tensor.empty() : tensor<1x64xf32>
+    %4741 = "ttir.relu"(%4739, %4740) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4742 = tensor.empty() : tensor<1x64xf32>
+    %4743 = "ttir.relu"(%4741, %4742) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4744 = tensor.empty() : tensor<1x64xf32>
+    %4745 = "ttir.relu"(%4743, %4744) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4746 = tensor.empty() : tensor<1x64xf32>
+    %4747 = "ttir.relu"(%4745, %4746) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4748 = tensor.empty() : tensor<1x64xf32>
+    %4749 = "ttir.relu"(%4747, %4748) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4750 = tensor.empty() : tensor<1x64xf32>
+    %4751 = "ttir.relu"(%4749, %4750) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4752 = tensor.empty() : tensor<1x64xf32>
+    %4753 = "ttir.relu"(%4751, %4752) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4754 = tensor.empty() : tensor<1x64xf32>
+    %4755 = "ttir.relu"(%4753, %4754) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4756 = tensor.empty() : tensor<1x64xf32>
+    %4757 = "ttir.relu"(%4755, %4756) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4758 = tensor.empty() : tensor<1x64xf32>
+    %4759 = "ttir.relu"(%4757, %4758) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4760 = tensor.empty() : tensor<1x64xf32>
+    %4761 = "ttir.relu"(%4759, %4760) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4762 = tensor.empty() : tensor<1x64xf32>
+    %4763 = "ttir.relu"(%4761, %4762) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4764 = tensor.empty() : tensor<1x64xf32>
+    %4765 = "ttir.relu"(%4763, %4764) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4766 = tensor.empty() : tensor<1x64xf32>
+    %4767 = "ttir.relu"(%4765, %4766) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4768 = tensor.empty() : tensor<1x64xf32>
+    %4769 = "ttir.relu"(%4767, %4768) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4770 = tensor.empty() : tensor<1x64xf32>
+    %4771 = "ttir.relu"(%4769, %4770) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4772 = tensor.empty() : tensor<1x64xf32>
+    %4773 = "ttir.relu"(%4771, %4772) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4774 = tensor.empty() : tensor<1x64xf32>
+    %4775 = "ttir.relu"(%4773, %4774) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4776 = tensor.empty() : tensor<1x64xf32>
+    %4777 = "ttir.relu"(%4775, %4776) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4778 = tensor.empty() : tensor<1x64xf32>
+    %4779 = "ttir.relu"(%4777, %4778) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4780 = tensor.empty() : tensor<1x64xf32>
+    %4781 = "ttir.relu"(%4779, %4780) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4782 = tensor.empty() : tensor<1x64xf32>
+    %4783 = "ttir.relu"(%4781, %4782) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4784 = tensor.empty() : tensor<1x64xf32>
+    %4785 = "ttir.relu"(%4783, %4784) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4786 = tensor.empty() : tensor<1x64xf32>
+    %4787 = "ttir.relu"(%4785, %4786) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4788 = tensor.empty() : tensor<1x64xf32>
+    %4789 = "ttir.relu"(%4787, %4788) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4790 = tensor.empty() : tensor<1x64xf32>
+    %4791 = "ttir.relu"(%4789, %4790) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4792 = tensor.empty() : tensor<1x64xf32>
+    %4793 = "ttir.relu"(%4791, %4792) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4794 = tensor.empty() : tensor<1x64xf32>
+    %4795 = "ttir.relu"(%4793, %4794) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4796 = tensor.empty() : tensor<1x64xf32>
+    %4797 = "ttir.relu"(%4795, %4796) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4798 = tensor.empty() : tensor<1x64xf32>
+    %4799 = "ttir.relu"(%4797, %4798) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4800 = tensor.empty() : tensor<1x64xf32>
+    %4801 = "ttir.relu"(%4799, %4800) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4802 = tensor.empty() : tensor<1x64xf32>
+    %4803 = "ttir.relu"(%4801, %4802) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4804 = tensor.empty() : tensor<1x64xf32>
+    %4805 = "ttir.relu"(%4803, %4804) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4806 = tensor.empty() : tensor<1x64xf32>
+    %4807 = "ttir.relu"(%4805, %4806) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4808 = tensor.empty() : tensor<1x64xf32>
+    %4809 = "ttir.relu"(%4807, %4808) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4810 = tensor.empty() : tensor<1x64xf32>
+    %4811 = "ttir.relu"(%4809, %4810) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4812 = tensor.empty() : tensor<1x64xf32>
+    %4813 = "ttir.relu"(%4811, %4812) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4814 = tensor.empty() : tensor<1x64xf32>
+    %4815 = "ttir.relu"(%4813, %4814) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4816 = tensor.empty() : tensor<1x64xf32>
+    %4817 = "ttir.relu"(%4815, %4816) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4818 = tensor.empty() : tensor<1x64xf32>
+    %4819 = "ttir.relu"(%4817, %4818) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4820 = tensor.empty() : tensor<1x64xf32>
+    %4821 = "ttir.relu"(%4819, %4820) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4822 = tensor.empty() : tensor<1x64xf32>
+    %4823 = "ttir.relu"(%4821, %4822) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4824 = tensor.empty() : tensor<1x64xf32>
+    %4825 = "ttir.relu"(%4823, %4824) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4826 = tensor.empty() : tensor<1x64xf32>
+    %4827 = "ttir.relu"(%4825, %4826) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4828 = tensor.empty() : tensor<1x64xf32>
+    %4829 = "ttir.relu"(%4827, %4828) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4830 = tensor.empty() : tensor<1x64xf32>
+    %4831 = "ttir.relu"(%4829, %4830) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4832 = tensor.empty() : tensor<1x64xf32>
+    %4833 = "ttir.relu"(%4831, %4832) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4834 = tensor.empty() : tensor<1x64xf32>
+    %4835 = "ttir.relu"(%4833, %4834) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4836 = tensor.empty() : tensor<1x64xf32>
+    %4837 = "ttir.relu"(%4835, %4836) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4838 = tensor.empty() : tensor<1x64xf32>
+    %4839 = "ttir.relu"(%4837, %4838) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4840 = tensor.empty() : tensor<1x64xf32>
+    %4841 = "ttir.relu"(%4839, %4840) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4842 = tensor.empty() : tensor<1x64xf32>
+    %4843 = "ttir.relu"(%4841, %4842) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4844 = tensor.empty() : tensor<1x64xf32>
+    %4845 = "ttir.relu"(%4843, %4844) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4846 = tensor.empty() : tensor<1x64xf32>
+    %4847 = "ttir.relu"(%4845, %4846) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4848 = tensor.empty() : tensor<1x64xf32>
+    %4849 = "ttir.relu"(%4847, %4848) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4850 = tensor.empty() : tensor<1x64xf32>
+    %4851 = "ttir.relu"(%4849, %4850) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4852 = tensor.empty() : tensor<1x64xf32>
+    %4853 = "ttir.relu"(%4851, %4852) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4854 = tensor.empty() : tensor<1x64xf32>
+    %4855 = "ttir.relu"(%4853, %4854) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4856 = tensor.empty() : tensor<1x64xf32>
+    %4857 = "ttir.relu"(%4855, %4856) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4858 = tensor.empty() : tensor<1x64xf32>
+    %4859 = "ttir.relu"(%4857, %4858) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4860 = tensor.empty() : tensor<1x64xf32>
+    %4861 = "ttir.relu"(%4859, %4860) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4862 = tensor.empty() : tensor<1x64xf32>
+    %4863 = "ttir.relu"(%4861, %4862) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4864 = tensor.empty() : tensor<1x64xf32>
+    %4865 = "ttir.relu"(%4863, %4864) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4866 = tensor.empty() : tensor<1x64xf32>
+    %4867 = "ttir.relu"(%4865, %4866) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4868 = tensor.empty() : tensor<1x64xf32>
+    %4869 = "ttir.relu"(%4867, %4868) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4870 = tensor.empty() : tensor<1x64xf32>
+    %4871 = "ttir.relu"(%4869, %4870) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4872 = tensor.empty() : tensor<1x64xf32>
+    %4873 = "ttir.relu"(%4871, %4872) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4874 = tensor.empty() : tensor<1x64xf32>
+    %4875 = "ttir.relu"(%4873, %4874) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4876 = tensor.empty() : tensor<1x64xf32>
+    %4877 = "ttir.relu"(%4875, %4876) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4878 = tensor.empty() : tensor<1x64xf32>
+    %4879 = "ttir.relu"(%4877, %4878) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4880 = tensor.empty() : tensor<1x64xf32>
+    %4881 = "ttir.relu"(%4879, %4880) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4882 = tensor.empty() : tensor<1x64xf32>
+    %4883 = "ttir.relu"(%4881, %4882) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4884 = tensor.empty() : tensor<1x64xf32>
+    %4885 = "ttir.relu"(%4883, %4884) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4886 = tensor.empty() : tensor<1x64xf32>
+    %4887 = "ttir.relu"(%4885, %4886) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4888 = tensor.empty() : tensor<1x64xf32>
+    %4889 = "ttir.relu"(%4887, %4888) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4890 = tensor.empty() : tensor<1x64xf32>
+    %4891 = "ttir.relu"(%4889, %4890) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4892 = tensor.empty() : tensor<1x64xf32>
+    %4893 = "ttir.relu"(%4891, %4892) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4894 = tensor.empty() : tensor<1x64xf32>
+    %4895 = "ttir.relu"(%4893, %4894) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4896 = tensor.empty() : tensor<1x64xf32>
+    %4897 = "ttir.relu"(%4895, %4896) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4898 = tensor.empty() : tensor<1x64xf32>
+    %4899 = "ttir.relu"(%4897, %4898) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4900 = tensor.empty() : tensor<1x64xf32>
+    %4901 = "ttir.relu"(%4899, %4900) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4902 = tensor.empty() : tensor<1x64xf32>
+    %4903 = "ttir.relu"(%4901, %4902) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4904 = tensor.empty() : tensor<1x64xf32>
+    %4905 = "ttir.relu"(%4903, %4904) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4906 = tensor.empty() : tensor<1x64xf32>
+    %4907 = "ttir.relu"(%4905, %4906) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4908 = tensor.empty() : tensor<1x64xf32>
+    %4909 = "ttir.relu"(%4907, %4908) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4910 = tensor.empty() : tensor<1x64xf32>
+    %4911 = "ttir.relu"(%4909, %4910) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4912 = tensor.empty() : tensor<1x64xf32>
+    %4913 = "ttir.relu"(%4911, %4912) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4914 = tensor.empty() : tensor<1x64xf32>
+    %4915 = "ttir.relu"(%4913, %4914) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4916 = tensor.empty() : tensor<1x64xf32>
+    %4917 = "ttir.relu"(%4915, %4916) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4918 = tensor.empty() : tensor<1x64xf32>
+    %4919 = "ttir.relu"(%4917, %4918) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4920 = tensor.empty() : tensor<1x64xf32>
+    %4921 = "ttir.relu"(%4919, %4920) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4922 = tensor.empty() : tensor<1x64xf32>
+    %4923 = "ttir.relu"(%4921, %4922) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4924 = tensor.empty() : tensor<1x64xf32>
+    %4925 = "ttir.relu"(%4923, %4924) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4926 = tensor.empty() : tensor<1x64xf32>
+    %4927 = "ttir.relu"(%4925, %4926) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4928 = tensor.empty() : tensor<1x64xf32>
+    %4929 = "ttir.relu"(%4927, %4928) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4930 = tensor.empty() : tensor<1x64xf32>
+    %4931 = "ttir.relu"(%4929, %4930) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4932 = tensor.empty() : tensor<1x64xf32>
+    %4933 = "ttir.relu"(%4931, %4932) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4934 = tensor.empty() : tensor<1x64xf32>
+    %4935 = "ttir.relu"(%4933, %4934) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4936 = tensor.empty() : tensor<1x64xf32>
+    %4937 = "ttir.relu"(%4935, %4936) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4938 = tensor.empty() : tensor<1x64xf32>
+    %4939 = "ttir.relu"(%4937, %4938) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4940 = tensor.empty() : tensor<1x64xf32>
+    %4941 = "ttir.relu"(%4939, %4940) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4942 = tensor.empty() : tensor<1x64xf32>
+    %4943 = "ttir.relu"(%4941, %4942) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4944 = tensor.empty() : tensor<1x64xf32>
+    %4945 = "ttir.relu"(%4943, %4944) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4946 = tensor.empty() : tensor<1x64xf32>
+    %4947 = "ttir.relu"(%4945, %4946) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4948 = tensor.empty() : tensor<1x64xf32>
+    %4949 = "ttir.relu"(%4947, %4948) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4950 = tensor.empty() : tensor<1x64xf32>
+    %4951 = "ttir.relu"(%4949, %4950) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4952 = tensor.empty() : tensor<1x64xf32>
+    %4953 = "ttir.relu"(%4951, %4952) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4954 = tensor.empty() : tensor<1x64xf32>
+    %4955 = "ttir.relu"(%4953, %4954) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4956 = tensor.empty() : tensor<1x64xf32>
+    %4957 = "ttir.relu"(%4955, %4956) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4958 = tensor.empty() : tensor<1x64xf32>
+    %4959 = "ttir.relu"(%4957, %4958) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4960 = tensor.empty() : tensor<1x64xf32>
+    %4961 = "ttir.relu"(%4959, %4960) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4962 = tensor.empty() : tensor<1x64xf32>
+    %4963 = "ttir.relu"(%4961, %4962) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4964 = tensor.empty() : tensor<1x64xf32>
+    %4965 = "ttir.relu"(%4963, %4964) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4966 = tensor.empty() : tensor<1x64xf32>
+    %4967 = "ttir.relu"(%4965, %4966) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4968 = tensor.empty() : tensor<1x64xf32>
+    %4969 = "ttir.relu"(%4967, %4968) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4970 = tensor.empty() : tensor<1x64xf32>
+    %4971 = "ttir.relu"(%4969, %4970) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4972 = tensor.empty() : tensor<1x64xf32>
+    %4973 = "ttir.relu"(%4971, %4972) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4974 = tensor.empty() : tensor<1x64xf32>
+    %4975 = "ttir.relu"(%4973, %4974) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4976 = tensor.empty() : tensor<1x64xf32>
+    %4977 = "ttir.relu"(%4975, %4976) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4978 = tensor.empty() : tensor<1x64xf32>
+    %4979 = "ttir.relu"(%4977, %4978) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4980 = tensor.empty() : tensor<1x64xf32>
+    %4981 = "ttir.relu"(%4979, %4980) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4982 = tensor.empty() : tensor<1x64xf32>
+    %4983 = "ttir.relu"(%4981, %4982) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4984 = tensor.empty() : tensor<1x64xf32>
+    %4985 = "ttir.relu"(%4983, %4984) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4986 = tensor.empty() : tensor<1x64xf32>
+    %4987 = "ttir.relu"(%4985, %4986) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4988 = tensor.empty() : tensor<1x64xf32>
+    %4989 = "ttir.relu"(%4987, %4988) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4990 = tensor.empty() : tensor<1x64xf32>
+    %4991 = "ttir.relu"(%4989, %4990) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4992 = tensor.empty() : tensor<1x64xf32>
+    %4993 = "ttir.relu"(%4991, %4992) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4994 = tensor.empty() : tensor<1x64xf32>
+    %4995 = "ttir.relu"(%4993, %4994) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4996 = tensor.empty() : tensor<1x64xf32>
+    %4997 = "ttir.relu"(%4995, %4996) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4998 = tensor.empty() : tensor<1x64xf32>
+    %4999 = "ttir.relu"(%4997, %4998) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5000 = tensor.empty() : tensor<1x64xf32>
+    %5001 = "ttir.relu"(%4999, %5000) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5002 = tensor.empty() : tensor<1x64xf32>
+    %5003 = "ttir.relu"(%5001, %5002) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5004 = tensor.empty() : tensor<1x64xf32>
+    %5005 = "ttir.relu"(%5003, %5004) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5006 = tensor.empty() : tensor<1x64xf32>
+    %5007 = "ttir.relu"(%5005, %5006) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5008 = tensor.empty() : tensor<1x64xf32>
+    %5009 = "ttir.relu"(%5007, %5008) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5010 = tensor.empty() : tensor<1x64xf32>
+    %5011 = "ttir.relu"(%5009, %5010) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5012 = tensor.empty() : tensor<1x64xf32>
+    %5013 = "ttir.relu"(%5011, %5012) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5014 = tensor.empty() : tensor<1x64xf32>
+    %5015 = "ttir.relu"(%5013, %5014) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5016 = tensor.empty() : tensor<1x64xf32>
+    %5017 = "ttir.relu"(%5015, %5016) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5018 = tensor.empty() : tensor<1x64xf32>
+    %5019 = "ttir.relu"(%5017, %5018) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5020 = tensor.empty() : tensor<1x64xf32>
+    %5021 = "ttir.relu"(%5019, %5020) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5022 = tensor.empty() : tensor<1x64xf32>
+    %5023 = "ttir.relu"(%5021, %5022) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5024 = tensor.empty() : tensor<1x64xf32>
+    %5025 = "ttir.relu"(%5023, %5024) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5026 = tensor.empty() : tensor<1x64xf32>
+    %5027 = "ttir.relu"(%5025, %5026) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5028 = tensor.empty() : tensor<1x64xf32>
+    %5029 = "ttir.relu"(%5027, %5028) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5030 = tensor.empty() : tensor<1x64xf32>
+    %5031 = "ttir.relu"(%5029, %5030) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5032 = tensor.empty() : tensor<1x64xf32>
+    %5033 = "ttir.relu"(%5031, %5032) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5034 = tensor.empty() : tensor<1x64xf32>
+    %5035 = "ttir.relu"(%5033, %5034) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5036 = tensor.empty() : tensor<1x64xf32>
+    %5037 = "ttir.relu"(%5035, %5036) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5038 = tensor.empty() : tensor<1x64xf32>
+    %5039 = "ttir.relu"(%5037, %5038) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5040 = tensor.empty() : tensor<1x64xf32>
+    %5041 = "ttir.relu"(%5039, %5040) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5042 = tensor.empty() : tensor<1x64xf32>
+    %5043 = "ttir.relu"(%5041, %5042) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5044 = tensor.empty() : tensor<1x64xf32>
+    %5045 = "ttir.relu"(%5043, %5044) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5046 = tensor.empty() : tensor<1x64xf32>
+    %5047 = "ttir.relu"(%5045, %5046) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5048 = tensor.empty() : tensor<1x64xf32>
+    %5049 = "ttir.relu"(%5047, %5048) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5050 = tensor.empty() : tensor<1x64xf32>
+    %5051 = "ttir.relu"(%5049, %5050) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5052 = tensor.empty() : tensor<1x64xf32>
+    %5053 = "ttir.relu"(%5051, %5052) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5054 = tensor.empty() : tensor<1x64xf32>
+    %5055 = "ttir.relu"(%5053, %5054) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5056 = tensor.empty() : tensor<1x64xf32>
+    %5057 = "ttir.relu"(%5055, %5056) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5058 = tensor.empty() : tensor<1x64xf32>
+    %5059 = "ttir.relu"(%5057, %5058) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5060 = tensor.empty() : tensor<1x64xf32>
+    %5061 = "ttir.relu"(%5059, %5060) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5062 = tensor.empty() : tensor<1x64xf32>
+    %5063 = "ttir.relu"(%5061, %5062) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5064 = tensor.empty() : tensor<1x64xf32>
+    %5065 = "ttir.relu"(%5063, %5064) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5066 = tensor.empty() : tensor<1x64xf32>
+    %5067 = "ttir.relu"(%5065, %5066) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5068 = tensor.empty() : tensor<1x64xf32>
+    %5069 = "ttir.relu"(%5067, %5068) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5070 = tensor.empty() : tensor<1x64xf32>
+    %5071 = "ttir.relu"(%5069, %5070) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5072 = tensor.empty() : tensor<1x64xf32>
+    %5073 = "ttir.relu"(%5071, %5072) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5074 = tensor.empty() : tensor<1x64xf32>
+    %5075 = "ttir.relu"(%5073, %5074) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5076 = tensor.empty() : tensor<1x64xf32>
+    %5077 = "ttir.relu"(%5075, %5076) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5078 = tensor.empty() : tensor<1x64xf32>
+    %5079 = "ttir.relu"(%5077, %5078) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5080 = tensor.empty() : tensor<1x64xf32>
+    %5081 = "ttir.relu"(%5079, %5080) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5082 = tensor.empty() : tensor<1x64xf32>
+    %5083 = "ttir.relu"(%5081, %5082) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5084 = tensor.empty() : tensor<1x64xf32>
+    %5085 = "ttir.relu"(%5083, %5084) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5086 = tensor.empty() : tensor<1x64xf32>
+    %5087 = "ttir.relu"(%5085, %5086) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5088 = tensor.empty() : tensor<1x64xf32>
+    %5089 = "ttir.relu"(%5087, %5088) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5090 = tensor.empty() : tensor<1x64xf32>
+    %5091 = "ttir.relu"(%5089, %5090) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5092 = tensor.empty() : tensor<1x64xf32>
+    %5093 = "ttir.relu"(%5091, %5092) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5094 = tensor.empty() : tensor<1x64xf32>
+    %5095 = "ttir.relu"(%5093, %5094) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5096 = tensor.empty() : tensor<1x64xf32>
+    %5097 = "ttir.relu"(%5095, %5096) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5098 = tensor.empty() : tensor<1x64xf32>
+    %5099 = "ttir.relu"(%5097, %5098) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5100 = tensor.empty() : tensor<1x64xf32>
+    %5101 = "ttir.relu"(%5099, %5100) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5102 = tensor.empty() : tensor<1x64xf32>
+    %5103 = "ttir.relu"(%5101, %5102) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5104 = tensor.empty() : tensor<1x64xf32>
+    %5105 = "ttir.relu"(%5103, %5104) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5106 = tensor.empty() : tensor<1x64xf32>
+    %5107 = "ttir.relu"(%5105, %5106) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5108 = tensor.empty() : tensor<1x64xf32>
+    %5109 = "ttir.relu"(%5107, %5108) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5110 = tensor.empty() : tensor<1x64xf32>
+    %5111 = "ttir.relu"(%5109, %5110) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5112 = tensor.empty() : tensor<1x64xf32>
+    %5113 = "ttir.relu"(%5111, %5112) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5114 = tensor.empty() : tensor<1x64xf32>
+    %5115 = "ttir.relu"(%5113, %5114) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5116 = tensor.empty() : tensor<1x64xf32>
+    %5117 = "ttir.relu"(%5115, %5116) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5118 = tensor.empty() : tensor<1x64xf32>
+    %5119 = "ttir.relu"(%5117, %5118) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5120 = tensor.empty() : tensor<1x64xf32>
+    %5121 = "ttir.relu"(%5119, %5120) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5122 = tensor.empty() : tensor<1x64xf32>
+    %5123 = "ttir.relu"(%5121, %5122) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5124 = tensor.empty() : tensor<1x64xf32>
+    %5125 = "ttir.relu"(%5123, %5124) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5126 = tensor.empty() : tensor<1x64xf32>
+    %5127 = "ttir.relu"(%5125, %5126) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5128 = tensor.empty() : tensor<1x64xf32>
+    %5129 = "ttir.relu"(%5127, %5128) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5130 = tensor.empty() : tensor<1x64xf32>
+    %5131 = "ttir.relu"(%5129, %5130) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5132 = tensor.empty() : tensor<1x64xf32>
+    %5133 = "ttir.relu"(%5131, %5132) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5134 = tensor.empty() : tensor<1x64xf32>
+    %5135 = "ttir.relu"(%5133, %5134) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5136 = tensor.empty() : tensor<1x64xf32>
+    %5137 = "ttir.relu"(%5135, %5136) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5138 = tensor.empty() : tensor<1x64xf32>
+    %5139 = "ttir.relu"(%5137, %5138) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5140 = tensor.empty() : tensor<1x64xf32>
+    %5141 = "ttir.relu"(%5139, %5140) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5142 = tensor.empty() : tensor<1x64xf32>
+    %5143 = "ttir.relu"(%5141, %5142) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5144 = tensor.empty() : tensor<1x64xf32>
+    %5145 = "ttir.relu"(%5143, %5144) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5146 = tensor.empty() : tensor<1x64xf32>
+    %5147 = "ttir.relu"(%5145, %5146) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5148 = tensor.empty() : tensor<1x64xf32>
+    %5149 = "ttir.relu"(%5147, %5148) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5150 = tensor.empty() : tensor<1x64xf32>
+    %5151 = "ttir.relu"(%5149, %5150) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5152 = tensor.empty() : tensor<1x64xf32>
+    %5153 = "ttir.relu"(%5151, %5152) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5154 = tensor.empty() : tensor<1x64xf32>
+    %5155 = "ttir.relu"(%5153, %5154) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5156 = tensor.empty() : tensor<1x64xf32>
+    %5157 = "ttir.relu"(%5155, %5156) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5158 = tensor.empty() : tensor<1x64xf32>
+    %5159 = "ttir.relu"(%5157, %5158) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5160 = tensor.empty() : tensor<1x64xf32>
+    %5161 = "ttir.relu"(%5159, %5160) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5162 = tensor.empty() : tensor<1x64xf32>
+    %5163 = "ttir.relu"(%5161, %5162) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5164 = tensor.empty() : tensor<1x64xf32>
+    %5165 = "ttir.relu"(%5163, %5164) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5166 = tensor.empty() : tensor<1x64xf32>
+    %5167 = "ttir.relu"(%5165, %5166) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5168 = tensor.empty() : tensor<1x64xf32>
+    %5169 = "ttir.relu"(%5167, %5168) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5170 = tensor.empty() : tensor<1x64xf32>
+    %5171 = "ttir.relu"(%5169, %5170) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5172 = tensor.empty() : tensor<1x64xf32>
+    %5173 = "ttir.relu"(%5171, %5172) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5174 = tensor.empty() : tensor<1x64xf32>
+    %5175 = "ttir.relu"(%5173, %5174) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5176 = tensor.empty() : tensor<1x64xf32>
+    %5177 = "ttir.relu"(%5175, %5176) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5178 = tensor.empty() : tensor<1x64xf32>
+    %5179 = "ttir.relu"(%5177, %5178) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5180 = tensor.empty() : tensor<1x64xf32>
+    %5181 = "ttir.relu"(%5179, %5180) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5182 = tensor.empty() : tensor<1x64xf32>
+    %5183 = "ttir.relu"(%5181, %5182) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5184 = tensor.empty() : tensor<1x64xf32>
+    %5185 = "ttir.relu"(%5183, %5184) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5186 = tensor.empty() : tensor<1x64xf32>
+    %5187 = "ttir.relu"(%5185, %5186) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5188 = tensor.empty() : tensor<1x64xf32>
+    %5189 = "ttir.relu"(%5187, %5188) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5190 = tensor.empty() : tensor<1x64xf32>
+    %5191 = "ttir.relu"(%5189, %5190) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5192 = tensor.empty() : tensor<1x64xf32>
+    %5193 = "ttir.relu"(%5191, %5192) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5194 = tensor.empty() : tensor<1x64xf32>
+    %5195 = "ttir.relu"(%5193, %5194) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5196 = tensor.empty() : tensor<1x64xf32>
+    %5197 = "ttir.relu"(%5195, %5196) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5198 = tensor.empty() : tensor<1x64xf32>
+    %5199 = "ttir.relu"(%5197, %5198) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5200 = tensor.empty() : tensor<1x64xf32>
+    %5201 = "ttir.relu"(%5199, %5200) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5202 = tensor.empty() : tensor<1x64xf32>
+    %5203 = "ttir.relu"(%5201, %5202) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5204 = tensor.empty() : tensor<1x64xf32>
+    %5205 = "ttir.relu"(%5203, %5204) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5206 = tensor.empty() : tensor<1x64xf32>
+    %5207 = "ttir.relu"(%5205, %5206) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5208 = tensor.empty() : tensor<1x64xf32>
+    %5209 = "ttir.relu"(%5207, %5208) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5210 = tensor.empty() : tensor<1x64xf32>
+    %5211 = "ttir.relu"(%5209, %5210) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5212 = tensor.empty() : tensor<1x64xf32>
+    %5213 = "ttir.relu"(%5211, %5212) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5214 = tensor.empty() : tensor<1x64xf32>
+    %5215 = "ttir.relu"(%5213, %5214) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5216 = tensor.empty() : tensor<1x64xf32>
+    %5217 = "ttir.relu"(%5215, %5216) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5218 = tensor.empty() : tensor<1x64xf32>
+    %5219 = "ttir.relu"(%5217, %5218) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5220 = tensor.empty() : tensor<1x64xf32>
+    %5221 = "ttir.relu"(%5219, %5220) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5222 = tensor.empty() : tensor<1x64xf32>
+    %5223 = "ttir.relu"(%5221, %5222) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5224 = tensor.empty() : tensor<1x64xf32>
+    %5225 = "ttir.relu"(%5223, %5224) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5226 = tensor.empty() : tensor<1x64xf32>
+    %5227 = "ttir.relu"(%5225, %5226) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5228 = tensor.empty() : tensor<1x64xf32>
+    %5229 = "ttir.relu"(%5227, %5228) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5230 = tensor.empty() : tensor<1x64xf32>
+    %5231 = "ttir.relu"(%5229, %5230) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5232 = tensor.empty() : tensor<1x64xf32>
+    %5233 = "ttir.relu"(%5231, %5232) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5234 = tensor.empty() : tensor<1x64xf32>
+    %5235 = "ttir.relu"(%5233, %5234) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5236 = tensor.empty() : tensor<1x64xf32>
+    %5237 = "ttir.relu"(%5235, %5236) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5238 = tensor.empty() : tensor<1x64xf32>
+    %5239 = "ttir.relu"(%5237, %5238) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5240 = tensor.empty() : tensor<1x64xf32>
+    %5241 = "ttir.relu"(%5239, %5240) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5242 = tensor.empty() : tensor<1x64xf32>
+    %5243 = "ttir.relu"(%5241, %5242) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5244 = tensor.empty() : tensor<1x64xf32>
+    %5245 = "ttir.relu"(%5243, %5244) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5246 = tensor.empty() : tensor<1x64xf32>
+    %5247 = "ttir.relu"(%5245, %5246) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5248 = tensor.empty() : tensor<1x64xf32>
+    %5249 = "ttir.relu"(%5247, %5248) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5250 = tensor.empty() : tensor<1x64xf32>
+    %5251 = "ttir.relu"(%5249, %5250) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5252 = tensor.empty() : tensor<1x64xf32>
+    %5253 = "ttir.relu"(%5251, %5252) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5254 = tensor.empty() : tensor<1x64xf32>
+    %5255 = "ttir.relu"(%5253, %5254) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5256 = tensor.empty() : tensor<1x64xf32>
+    %5257 = "ttir.relu"(%5255, %5256) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5258 = tensor.empty() : tensor<1x64xf32>
+    %5259 = "ttir.relu"(%5257, %5258) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5260 = tensor.empty() : tensor<1x64xf32>
+    %5261 = "ttir.relu"(%5259, %5260) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5262 = tensor.empty() : tensor<1x64xf32>
+    %5263 = "ttir.relu"(%5261, %5262) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5264 = tensor.empty() : tensor<1x64xf32>
+    %5265 = "ttir.relu"(%5263, %5264) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5266 = tensor.empty() : tensor<1x64xf32>
+    %5267 = "ttir.relu"(%5265, %5266) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5268 = tensor.empty() : tensor<1x64xf32>
+    %5269 = "ttir.relu"(%5267, %5268) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5270 = tensor.empty() : tensor<1x64xf32>
+    %5271 = "ttir.relu"(%5269, %5270) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5272 = tensor.empty() : tensor<1x64xf32>
+    %5273 = "ttir.relu"(%5271, %5272) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5274 = tensor.empty() : tensor<1x64xf32>
+    %5275 = "ttir.relu"(%5273, %5274) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5276 = tensor.empty() : tensor<1x64xf32>
+    %5277 = "ttir.relu"(%5275, %5276) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5278 = tensor.empty() : tensor<1x64xf32>
+    %5279 = "ttir.relu"(%5277, %5278) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5280 = tensor.empty() : tensor<1x64xf32>
+    %5281 = "ttir.relu"(%5279, %5280) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5282 = tensor.empty() : tensor<1x64xf32>
+    %5283 = "ttir.relu"(%5281, %5282) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5284 = tensor.empty() : tensor<1x64xf32>
+    %5285 = "ttir.relu"(%5283, %5284) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5286 = tensor.empty() : tensor<1x64xf32>
+    %5287 = "ttir.relu"(%5285, %5286) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5288 = tensor.empty() : tensor<1x64xf32>
+    %5289 = "ttir.relu"(%5287, %5288) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5290 = tensor.empty() : tensor<1x64xf32>
+    %5291 = "ttir.relu"(%5289, %5290) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5292 = tensor.empty() : tensor<1x64xf32>
+    %5293 = "ttir.relu"(%5291, %5292) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5294 = tensor.empty() : tensor<1x64xf32>
+    %5295 = "ttir.relu"(%5293, %5294) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5296 = tensor.empty() : tensor<1x64xf32>
+    %5297 = "ttir.relu"(%5295, %5296) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5298 = tensor.empty() : tensor<1x64xf32>
+    %5299 = "ttir.relu"(%5297, %5298) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5300 = tensor.empty() : tensor<1x64xf32>
+    %5301 = "ttir.relu"(%5299, %5300) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5302 = tensor.empty() : tensor<1x64xf32>
+    %5303 = "ttir.relu"(%5301, %5302) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5304 = tensor.empty() : tensor<1x64xf32>
+    %5305 = "ttir.relu"(%5303, %5304) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5306 = tensor.empty() : tensor<1x64xf32>
+    %5307 = "ttir.relu"(%5305, %5306) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5308 = tensor.empty() : tensor<1x64xf32>
+    %5309 = "ttir.relu"(%5307, %5308) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5310 = tensor.empty() : tensor<1x64xf32>
+    %5311 = "ttir.relu"(%5309, %5310) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5312 = tensor.empty() : tensor<1x64xf32>
+    %5313 = "ttir.relu"(%5311, %5312) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5314 = tensor.empty() : tensor<1x64xf32>
+    %5315 = "ttir.relu"(%5313, %5314) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5316 = tensor.empty() : tensor<1x64xf32>
+    %5317 = "ttir.relu"(%5315, %5316) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5318 = tensor.empty() : tensor<1x64xf32>
+    %5319 = "ttir.relu"(%5317, %5318) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5320 = tensor.empty() : tensor<1x64xf32>
+    %5321 = "ttir.relu"(%5319, %5320) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5322 = tensor.empty() : tensor<1x64xf32>
+    %5323 = "ttir.relu"(%5321, %5322) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5324 = tensor.empty() : tensor<1x64xf32>
+    %5325 = "ttir.relu"(%5323, %5324) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5326 = tensor.empty() : tensor<1x64xf32>
+    %5327 = "ttir.relu"(%5325, %5326) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5328 = tensor.empty() : tensor<1x64xf32>
+    %5329 = "ttir.relu"(%5327, %5328) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5330 = tensor.empty() : tensor<1x64xf32>
+    %5331 = "ttir.relu"(%5329, %5330) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5332 = tensor.empty() : tensor<1x64xf32>
+    %5333 = "ttir.relu"(%5331, %5332) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5334 = tensor.empty() : tensor<1x64xf32>
+    %5335 = "ttir.relu"(%5333, %5334) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5336 = tensor.empty() : tensor<1x64xf32>
+    %5337 = "ttir.relu"(%5335, %5336) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5338 = tensor.empty() : tensor<1x64xf32>
+    %5339 = "ttir.relu"(%5337, %5338) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5340 = tensor.empty() : tensor<1x64xf32>
+    %5341 = "ttir.relu"(%5339, %5340) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5342 = tensor.empty() : tensor<1x64xf32>
+    %5343 = "ttir.relu"(%5341, %5342) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5344 = tensor.empty() : tensor<1x64xf32>
+    %5345 = "ttir.relu"(%5343, %5344) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5346 = tensor.empty() : tensor<1x64xf32>
+    %5347 = "ttir.relu"(%5345, %5346) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5348 = tensor.empty() : tensor<1x64xf32>
+    %5349 = "ttir.relu"(%5347, %5348) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5350 = tensor.empty() : tensor<1x64xf32>
+    %5351 = "ttir.relu"(%5349, %5350) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5352 = tensor.empty() : tensor<1x64xf32>
+    %5353 = "ttir.relu"(%5351, %5352) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5354 = tensor.empty() : tensor<1x64xf32>
+    %5355 = "ttir.relu"(%5353, %5354) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5356 = tensor.empty() : tensor<1x64xf32>
+    %5357 = "ttir.relu"(%5355, %5356) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5358 = tensor.empty() : tensor<1x64xf32>
+    %5359 = "ttir.relu"(%5357, %5358) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5360 = tensor.empty() : tensor<1x64xf32>
+    %5361 = "ttir.relu"(%5359, %5360) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5362 = tensor.empty() : tensor<1x64xf32>
+    %5363 = "ttir.relu"(%5361, %5362) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5364 = tensor.empty() : tensor<1x64xf32>
+    %5365 = "ttir.relu"(%5363, %5364) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5366 = tensor.empty() : tensor<1x64xf32>
+    %5367 = "ttir.relu"(%5365, %5366) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5368 = tensor.empty() : tensor<1x64xf32>
+    %5369 = "ttir.relu"(%5367, %5368) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5370 = tensor.empty() : tensor<1x64xf32>
+    %5371 = "ttir.relu"(%5369, %5370) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5372 = tensor.empty() : tensor<1x64xf32>
+    %5373 = "ttir.relu"(%5371, %5372) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5374 = tensor.empty() : tensor<1x64xf32>
+    %5375 = "ttir.relu"(%5373, %5374) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5376 = tensor.empty() : tensor<1x64xf32>
+    %5377 = "ttir.relu"(%5375, %5376) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5378 = tensor.empty() : tensor<1x64xf32>
+    %5379 = "ttir.relu"(%5377, %5378) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5380 = tensor.empty() : tensor<1x64xf32>
+    %5381 = "ttir.relu"(%5379, %5380) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5382 = tensor.empty() : tensor<1x64xf32>
+    %5383 = "ttir.relu"(%5381, %5382) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5384 = tensor.empty() : tensor<1x64xf32>
+    %5385 = "ttir.relu"(%5383, %5384) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5386 = tensor.empty() : tensor<1x64xf32>
+    %5387 = "ttir.relu"(%5385, %5386) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5388 = tensor.empty() : tensor<1x64xf32>
+    %5389 = "ttir.relu"(%5387, %5388) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5390 = tensor.empty() : tensor<1x64xf32>
+    %5391 = "ttir.relu"(%5389, %5390) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5392 = tensor.empty() : tensor<1x64xf32>
+    %5393 = "ttir.relu"(%5391, %5392) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5394 = tensor.empty() : tensor<1x64xf32>
+    %5395 = "ttir.relu"(%5393, %5394) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5396 = tensor.empty() : tensor<1x64xf32>
+    %5397 = "ttir.relu"(%5395, %5396) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5398 = tensor.empty() : tensor<1x64xf32>
+    %5399 = "ttir.relu"(%5397, %5398) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5400 = tensor.empty() : tensor<1x64xf32>
+    %5401 = "ttir.relu"(%5399, %5400) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5402 = tensor.empty() : tensor<1x64xf32>
+    %5403 = "ttir.relu"(%5401, %5402) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5404 = tensor.empty() : tensor<1x64xf32>
+    %5405 = "ttir.relu"(%5403, %5404) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5406 = tensor.empty() : tensor<1x64xf32>
+    %5407 = "ttir.relu"(%5405, %5406) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5408 = tensor.empty() : tensor<1x64xf32>
+    %5409 = "ttir.relu"(%5407, %5408) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5410 = tensor.empty() : tensor<1x64xf32>
+    %5411 = "ttir.relu"(%5409, %5410) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5412 = tensor.empty() : tensor<1x64xf32>
+    %5413 = "ttir.relu"(%5411, %5412) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5414 = tensor.empty() : tensor<1x64xf32>
+    %5415 = "ttir.relu"(%5413, %5414) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5416 = tensor.empty() : tensor<1x64xf32>
+    %5417 = "ttir.relu"(%5415, %5416) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5418 = tensor.empty() : tensor<1x64xf32>
+    %5419 = "ttir.relu"(%5417, %5418) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5420 = tensor.empty() : tensor<1x64xf32>
+    %5421 = "ttir.relu"(%5419, %5420) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5422 = tensor.empty() : tensor<1x64xf32>
+    %5423 = "ttir.relu"(%5421, %5422) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5424 = tensor.empty() : tensor<1x64xf32>
+    %5425 = "ttir.relu"(%5423, %5424) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5426 = tensor.empty() : tensor<1x64xf32>
+    %5427 = "ttir.relu"(%5425, %5426) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5428 = tensor.empty() : tensor<1x64xf32>
+    %5429 = "ttir.relu"(%5427, %5428) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5430 = tensor.empty() : tensor<1x64xf32>
+    %5431 = "ttir.relu"(%5429, %5430) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5432 = tensor.empty() : tensor<1x64xf32>
+    %5433 = "ttir.relu"(%5431, %5432) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5434 = tensor.empty() : tensor<1x64xf32>
+    %5435 = "ttir.relu"(%5433, %5434) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5436 = tensor.empty() : tensor<1x64xf32>
+    %5437 = "ttir.relu"(%5435, %5436) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5438 = tensor.empty() : tensor<1x64xf32>
+    %5439 = "ttir.relu"(%5437, %5438) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5440 = tensor.empty() : tensor<1x64xf32>
+    %5441 = "ttir.relu"(%5439, %5440) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5442 = tensor.empty() : tensor<1x64xf32>
+    %5443 = "ttir.relu"(%5441, %5442) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5444 = tensor.empty() : tensor<1x64xf32>
+    %5445 = "ttir.relu"(%5443, %5444) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5446 = tensor.empty() : tensor<1x64xf32>
+    %5447 = "ttir.relu"(%5445, %5446) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5448 = tensor.empty() : tensor<1x64xf32>
+    %5449 = "ttir.relu"(%5447, %5448) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5450 = tensor.empty() : tensor<1x64xf32>
+    %5451 = "ttir.relu"(%5449, %5450) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5452 = tensor.empty() : tensor<1x64xf32>
+    %5453 = "ttir.relu"(%5451, %5452) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5454 = tensor.empty() : tensor<1x64xf32>
+    %5455 = "ttir.relu"(%5453, %5454) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5456 = tensor.empty() : tensor<1x64xf32>
+    %5457 = "ttir.relu"(%5455, %5456) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5458 = tensor.empty() : tensor<1x64xf32>
+    %5459 = "ttir.relu"(%5457, %5458) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5460 = tensor.empty() : tensor<1x64xf32>
+    %5461 = "ttir.relu"(%5459, %5460) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5462 = tensor.empty() : tensor<1x64xf32>
+    %5463 = "ttir.relu"(%5461, %5462) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5464 = tensor.empty() : tensor<1x64xf32>
+    %5465 = "ttir.relu"(%5463, %5464) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5466 = tensor.empty() : tensor<1x64xf32>
+    %5467 = "ttir.relu"(%5465, %5466) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5468 = tensor.empty() : tensor<1x64xf32>
+    %5469 = "ttir.relu"(%5467, %5468) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5470 = tensor.empty() : tensor<1x64xf32>
+    %5471 = "ttir.relu"(%5469, %5470) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5472 = tensor.empty() : tensor<1x64xf32>
+    %5473 = "ttir.relu"(%5471, %5472) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5474 = tensor.empty() : tensor<1x64xf32>
+    %5475 = "ttir.relu"(%5473, %5474) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5476 = tensor.empty() : tensor<1x64xf32>
+    %5477 = "ttir.relu"(%5475, %5476) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5478 = tensor.empty() : tensor<1x64xf32>
+    %5479 = "ttir.relu"(%5477, %5478) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5480 = tensor.empty() : tensor<1x64xf32>
+    %5481 = "ttir.relu"(%5479, %5480) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5482 = tensor.empty() : tensor<1x64xf32>
+    %5483 = "ttir.relu"(%5481, %5482) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5484 = tensor.empty() : tensor<1x64xf32>
+    %5485 = "ttir.relu"(%5483, %5484) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5486 = tensor.empty() : tensor<1x64xf32>
+    %5487 = "ttir.relu"(%5485, %5486) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5488 = tensor.empty() : tensor<1x64xf32>
+    %5489 = "ttir.relu"(%5487, %5488) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5490 = tensor.empty() : tensor<1x64xf32>
+    %5491 = "ttir.relu"(%5489, %5490) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5492 = tensor.empty() : tensor<1x64xf32>
+    %5493 = "ttir.relu"(%5491, %5492) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5494 = tensor.empty() : tensor<1x64xf32>
+    %5495 = "ttir.relu"(%5493, %5494) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5496 = tensor.empty() : tensor<1x64xf32>
+    %5497 = "ttir.relu"(%5495, %5496) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5498 = tensor.empty() : tensor<1x64xf32>
+    %5499 = "ttir.relu"(%5497, %5498) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5500 = tensor.empty() : tensor<1x64xf32>
+    %5501 = "ttir.relu"(%5499, %5500) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5502 = tensor.empty() : tensor<1x64xf32>
+    %5503 = "ttir.relu"(%5501, %5502) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5504 = tensor.empty() : tensor<1x64xf32>
+    %5505 = "ttir.relu"(%5503, %5504) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5506 = tensor.empty() : tensor<1x64xf32>
+    %5507 = "ttir.relu"(%5505, %5506) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5508 = tensor.empty() : tensor<1x64xf32>
+    %5509 = "ttir.relu"(%5507, %5508) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5510 = tensor.empty() : tensor<1x64xf32>
+    %5511 = "ttir.relu"(%5509, %5510) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5512 = tensor.empty() : tensor<1x64xf32>
+    %5513 = "ttir.relu"(%5511, %5512) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5514 = tensor.empty() : tensor<1x64xf32>
+    %5515 = "ttir.relu"(%5513, %5514) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5516 = tensor.empty() : tensor<1x64xf32>
+    %5517 = "ttir.relu"(%5515, %5516) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5518 = tensor.empty() : tensor<1x64xf32>
+    %5519 = "ttir.relu"(%5517, %5518) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5520 = tensor.empty() : tensor<1x64xf32>
+    %5521 = "ttir.relu"(%5519, %5520) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5522 = tensor.empty() : tensor<1x64xf32>
+    %5523 = "ttir.relu"(%5521, %5522) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5524 = tensor.empty() : tensor<1x64xf32>
+    %5525 = "ttir.relu"(%5523, %5524) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5526 = tensor.empty() : tensor<1x64xf32>
+    %5527 = "ttir.relu"(%5525, %5526) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5528 = tensor.empty() : tensor<1x64xf32>
+    %5529 = "ttir.relu"(%5527, %5528) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5530 = tensor.empty() : tensor<1x64xf32>
+    %5531 = "ttir.relu"(%5529, %5530) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5532 = tensor.empty() : tensor<1x64xf32>
+    %5533 = "ttir.relu"(%5531, %5532) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5534 = tensor.empty() : tensor<1x64xf32>
+    %5535 = "ttir.relu"(%5533, %5534) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5536 = tensor.empty() : tensor<1x64xf32>
+    %5537 = "ttir.relu"(%5535, %5536) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5538 = tensor.empty() : tensor<1x64xf32>
+    %5539 = "ttir.relu"(%5537, %5538) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5540 = tensor.empty() : tensor<1x64xf32>
+    %5541 = "ttir.relu"(%5539, %5540) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5542 = tensor.empty() : tensor<1x64xf32>
+    %5543 = "ttir.relu"(%5541, %5542) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5544 = tensor.empty() : tensor<1x64xf32>
+    %5545 = "ttir.relu"(%5543, %5544) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5546 = tensor.empty() : tensor<1x64xf32>
+    %5547 = "ttir.relu"(%5545, %5546) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5548 = tensor.empty() : tensor<1x64xf32>
+    %5549 = "ttir.relu"(%5547, %5548) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5550 = tensor.empty() : tensor<1x64xf32>
+    %5551 = "ttir.relu"(%5549, %5550) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5552 = tensor.empty() : tensor<1x64xf32>
+    %5553 = "ttir.relu"(%5551, %5552) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5554 = tensor.empty() : tensor<1x64xf32>
+    %5555 = "ttir.relu"(%5553, %5554) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5556 = tensor.empty() : tensor<1x64xf32>
+    %5557 = "ttir.relu"(%5555, %5556) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5558 = tensor.empty() : tensor<1x64xf32>
+    %5559 = "ttir.relu"(%5557, %5558) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5560 = tensor.empty() : tensor<1x64xf32>
+    %5561 = "ttir.relu"(%5559, %5560) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5562 = tensor.empty() : tensor<1x64xf32>
+    %5563 = "ttir.relu"(%5561, %5562) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5564 = tensor.empty() : tensor<1x64xf32>
+    %5565 = "ttir.relu"(%5563, %5564) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5566 = tensor.empty() : tensor<1x64xf32>
+    %5567 = "ttir.relu"(%5565, %5566) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5568 = tensor.empty() : tensor<1x64xf32>
+    %5569 = "ttir.relu"(%5567, %5568) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5570 = tensor.empty() : tensor<1x64xf32>
+    %5571 = "ttir.relu"(%5569, %5570) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5572 = tensor.empty() : tensor<1x64xf32>
+    %5573 = "ttir.relu"(%5571, %5572) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5574 = tensor.empty() : tensor<1x64xf32>
+    %5575 = "ttir.relu"(%5573, %5574) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5576 = tensor.empty() : tensor<1x64xf32>
+    %5577 = "ttir.relu"(%5575, %5576) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5578 = tensor.empty() : tensor<1x64xf32>
+    %5579 = "ttir.relu"(%5577, %5578) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5580 = tensor.empty() : tensor<1x64xf32>
+    %5581 = "ttir.relu"(%5579, %5580) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5582 = tensor.empty() : tensor<1x64xf32>
+    %5583 = "ttir.relu"(%5581, %5582) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5584 = tensor.empty() : tensor<1x64xf32>
+    %5585 = "ttir.relu"(%5583, %5584) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5586 = tensor.empty() : tensor<1x64xf32>
+    %5587 = "ttir.relu"(%5585, %5586) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5588 = tensor.empty() : tensor<1x64xf32>
+    %5589 = "ttir.relu"(%5587, %5588) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5590 = tensor.empty() : tensor<1x64xf32>
+    %5591 = "ttir.relu"(%5589, %5590) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5592 = tensor.empty() : tensor<1x64xf32>
+    %5593 = "ttir.relu"(%5591, %5592) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5594 = tensor.empty() : tensor<1x64xf32>
+    %5595 = "ttir.relu"(%5593, %5594) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5596 = tensor.empty() : tensor<1x64xf32>
+    %5597 = "ttir.relu"(%5595, %5596) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5598 = tensor.empty() : tensor<1x64xf32>
+    %5599 = "ttir.relu"(%5597, %5598) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5600 = tensor.empty() : tensor<1x64xf32>
+    %5601 = "ttir.relu"(%5599, %5600) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5602 = tensor.empty() : tensor<1x64xf32>
+    %5603 = "ttir.relu"(%5601, %5602) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5604 = tensor.empty() : tensor<1x64xf32>
+    %5605 = "ttir.relu"(%5603, %5604) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5606 = tensor.empty() : tensor<1x64xf32>
+    %5607 = "ttir.relu"(%5605, %5606) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5608 = tensor.empty() : tensor<1x64xf32>
+    %5609 = "ttir.relu"(%5607, %5608) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5610 = tensor.empty() : tensor<1x64xf32>
+    %5611 = "ttir.relu"(%5609, %5610) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5612 = tensor.empty() : tensor<1x64xf32>
+    %5613 = "ttir.relu"(%5611, %5612) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5614 = tensor.empty() : tensor<1x64xf32>
+    %5615 = "ttir.relu"(%5613, %5614) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5616 = tensor.empty() : tensor<1x64xf32>
+    %5617 = "ttir.relu"(%5615, %5616) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5618 = tensor.empty() : tensor<1x64xf32>
+    %5619 = "ttir.relu"(%5617, %5618) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5620 = tensor.empty() : tensor<1x64xf32>
+    %5621 = "ttir.relu"(%5619, %5620) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5622 = tensor.empty() : tensor<1x64xf32>
+    %5623 = "ttir.relu"(%5621, %5622) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5624 = tensor.empty() : tensor<1x64xf32>
+    %5625 = "ttir.relu"(%5623, %5624) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5626 = tensor.empty() : tensor<1x64xf32>
+    %5627 = "ttir.relu"(%5625, %5626) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5628 = tensor.empty() : tensor<1x64xf32>
+    %5629 = "ttir.relu"(%5627, %5628) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5630 = tensor.empty() : tensor<1x64xf32>
+    %5631 = "ttir.relu"(%5629, %5630) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5632 = tensor.empty() : tensor<1x64xf32>
+    %5633 = "ttir.relu"(%5631, %5632) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5634 = tensor.empty() : tensor<1x64xf32>
+    %5635 = "ttir.relu"(%5633, %5634) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5636 = tensor.empty() : tensor<1x64xf32>
+    %5637 = "ttir.relu"(%5635, %5636) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5638 = tensor.empty() : tensor<1x64xf32>
+    %5639 = "ttir.relu"(%5637, %5638) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5640 = tensor.empty() : tensor<1x64xf32>
+    %5641 = "ttir.relu"(%5639, %5640) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5642 = tensor.empty() : tensor<1x64xf32>
+    %5643 = "ttir.relu"(%5641, %5642) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5644 = tensor.empty() : tensor<1x64xf32>
+    %5645 = "ttir.relu"(%5643, %5644) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5646 = tensor.empty() : tensor<1x64xf32>
+    %5647 = "ttir.relu"(%5645, %5646) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5648 = tensor.empty() : tensor<1x64xf32>
+    %5649 = "ttir.relu"(%5647, %5648) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5650 = tensor.empty() : tensor<1x64xf32>
+    %5651 = "ttir.relu"(%5649, %5650) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5652 = tensor.empty() : tensor<1x64xf32>
+    %5653 = "ttir.relu"(%5651, %5652) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5654 = tensor.empty() : tensor<1x64xf32>
+    %5655 = "ttir.relu"(%5653, %5654) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5656 = tensor.empty() : tensor<1x64xf32>
+    %5657 = "ttir.relu"(%5655, %5656) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5658 = tensor.empty() : tensor<1x64xf32>
+    %5659 = "ttir.relu"(%5657, %5658) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5660 = tensor.empty() : tensor<1x64xf32>
+    %5661 = "ttir.relu"(%5659, %5660) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5662 = tensor.empty() : tensor<1x64xf32>
+    %5663 = "ttir.relu"(%5661, %5662) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5664 = tensor.empty() : tensor<1x64xf32>
+    %5665 = "ttir.relu"(%5663, %5664) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5666 = tensor.empty() : tensor<1x64xf32>
+    %5667 = "ttir.relu"(%5665, %5666) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5668 = tensor.empty() : tensor<1x64xf32>
+    %5669 = "ttir.relu"(%5667, %5668) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5670 = tensor.empty() : tensor<1x64xf32>
+    %5671 = "ttir.relu"(%5669, %5670) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5672 = tensor.empty() : tensor<1x64xf32>
+    %5673 = "ttir.relu"(%5671, %5672) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5674 = tensor.empty() : tensor<1x64xf32>
+    %5675 = "ttir.relu"(%5673, %5674) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5676 = tensor.empty() : tensor<1x64xf32>
+    %5677 = "ttir.relu"(%5675, %5676) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5678 = tensor.empty() : tensor<1x64xf32>
+    %5679 = "ttir.relu"(%5677, %5678) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5680 = tensor.empty() : tensor<1x64xf32>
+    %5681 = "ttir.relu"(%5679, %5680) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5682 = tensor.empty() : tensor<1x64xf32>
+    %5683 = "ttir.relu"(%5681, %5682) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5684 = tensor.empty() : tensor<1x64xf32>
+    %5685 = "ttir.relu"(%5683, %5684) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5686 = tensor.empty() : tensor<1x64xf32>
+    %5687 = "ttir.relu"(%5685, %5686) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5688 = tensor.empty() : tensor<1x64xf32>
+    %5689 = "ttir.relu"(%5687, %5688) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5690 = tensor.empty() : tensor<1x64xf32>
+    %5691 = "ttir.relu"(%5689, %5690) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5692 = tensor.empty() : tensor<1x64xf32>
+    %5693 = "ttir.relu"(%5691, %5692) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5694 = tensor.empty() : tensor<1x64xf32>
+    %5695 = "ttir.relu"(%5693, %5694) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5696 = tensor.empty() : tensor<1x64xf32>
+    %5697 = "ttir.relu"(%5695, %5696) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5698 = tensor.empty() : tensor<1x64xf32>
+    %5699 = "ttir.relu"(%5697, %5698) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5700 = tensor.empty() : tensor<1x64xf32>
+    %5701 = "ttir.relu"(%5699, %5700) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5702 = tensor.empty() : tensor<1x64xf32>
+    %5703 = "ttir.relu"(%5701, %5702) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5704 = tensor.empty() : tensor<1x64xf32>
+    %5705 = "ttir.relu"(%5703, %5704) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5706 = tensor.empty() : tensor<1x64xf32>
+    %5707 = "ttir.relu"(%5705, %5706) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5708 = tensor.empty() : tensor<1x64xf32>
+    %5709 = "ttir.relu"(%5707, %5708) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5710 = tensor.empty() : tensor<1x64xf32>
+    %5711 = "ttir.relu"(%5709, %5710) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5712 = tensor.empty() : tensor<1x64xf32>
+    %5713 = "ttir.relu"(%5711, %5712) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5714 = tensor.empty() : tensor<1x64xf32>
+    %5715 = "ttir.relu"(%5713, %5714) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5716 = tensor.empty() : tensor<1x64xf32>
+    %5717 = "ttir.relu"(%5715, %5716) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5718 = tensor.empty() : tensor<1x64xf32>
+    %5719 = "ttir.relu"(%5717, %5718) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5720 = tensor.empty() : tensor<1x64xf32>
+    %5721 = "ttir.relu"(%5719, %5720) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5722 = tensor.empty() : tensor<1x64xf32>
+    %5723 = "ttir.relu"(%5721, %5722) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5724 = tensor.empty() : tensor<1x64xf32>
+    %5725 = "ttir.relu"(%5723, %5724) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5726 = tensor.empty() : tensor<1x64xf32>
+    %5727 = "ttir.relu"(%5725, %5726) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5728 = tensor.empty() : tensor<1x64xf32>
+    %5729 = "ttir.relu"(%5727, %5728) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5730 = tensor.empty() : tensor<1x64xf32>
+    %5731 = "ttir.relu"(%5729, %5730) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5732 = tensor.empty() : tensor<1x64xf32>
+    %5733 = "ttir.relu"(%5731, %5732) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5734 = tensor.empty() : tensor<1x64xf32>
+    %5735 = "ttir.relu"(%5733, %5734) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5736 = tensor.empty() : tensor<1x64xf32>
+    %5737 = "ttir.relu"(%5735, %5736) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5738 = tensor.empty() : tensor<1x64xf32>
+    %5739 = "ttir.relu"(%5737, %5738) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5740 = tensor.empty() : tensor<1x64xf32>
+    %5741 = "ttir.relu"(%5739, %5740) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5742 = tensor.empty() : tensor<1x64xf32>
+    %5743 = "ttir.relu"(%5741, %5742) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5744 = tensor.empty() : tensor<1x64xf32>
+    %5745 = "ttir.relu"(%5743, %5744) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5746 = tensor.empty() : tensor<1x64xf32>
+    %5747 = "ttir.relu"(%5745, %5746) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5748 = tensor.empty() : tensor<1x64xf32>
+    %5749 = "ttir.relu"(%5747, %5748) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5750 = tensor.empty() : tensor<1x64xf32>
+    %5751 = "ttir.relu"(%5749, %5750) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5752 = tensor.empty() : tensor<1x64xf32>
+    %5753 = "ttir.relu"(%5751, %5752) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5754 = tensor.empty() : tensor<1x64xf32>
+    %5755 = "ttir.relu"(%5753, %5754) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5756 = tensor.empty() : tensor<1x64xf32>
+    %5757 = "ttir.relu"(%5755, %5756) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5758 = tensor.empty() : tensor<1x64xf32>
+    %5759 = "ttir.relu"(%5757, %5758) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5760 = tensor.empty() : tensor<1x64xf32>
+    %5761 = "ttir.relu"(%5759, %5760) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5762 = tensor.empty() : tensor<1x64xf32>
+    %5763 = "ttir.relu"(%5761, %5762) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5764 = tensor.empty() : tensor<1x64xf32>
+    %5765 = "ttir.relu"(%5763, %5764) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5766 = tensor.empty() : tensor<1x64xf32>
+    %5767 = "ttir.relu"(%5765, %5766) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5768 = tensor.empty() : tensor<1x64xf32>
+    %5769 = "ttir.relu"(%5767, %5768) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5770 = tensor.empty() : tensor<1x64xf32>
+    %5771 = "ttir.relu"(%5769, %5770) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5772 = tensor.empty() : tensor<1x64xf32>
+    %5773 = "ttir.relu"(%5771, %5772) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5774 = tensor.empty() : tensor<1x64xf32>
+    %5775 = "ttir.relu"(%5773, %5774) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5776 = tensor.empty() : tensor<1x64xf32>
+    %5777 = "ttir.relu"(%5775, %5776) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5778 = tensor.empty() : tensor<1x64xf32>
+    %5779 = "ttir.relu"(%5777, %5778) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5780 = tensor.empty() : tensor<1x64xf32>
+    %5781 = "ttir.relu"(%5779, %5780) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5782 = tensor.empty() : tensor<1x64xf32>
+    %5783 = "ttir.relu"(%5781, %5782) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5784 = tensor.empty() : tensor<1x64xf32>
+    %5785 = "ttir.relu"(%5783, %5784) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5786 = tensor.empty() : tensor<1x64xf32>
+    %5787 = "ttir.relu"(%5785, %5786) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5788 = tensor.empty() : tensor<1x64xf32>
+    %5789 = "ttir.relu"(%5787, %5788) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5790 = tensor.empty() : tensor<1x64xf32>
+    %5791 = "ttir.relu"(%5789, %5790) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5792 = tensor.empty() : tensor<1x64xf32>
+    %5793 = "ttir.relu"(%5791, %5792) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5794 = tensor.empty() : tensor<1x64xf32>
+    %5795 = "ttir.relu"(%5793, %5794) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5796 = tensor.empty() : tensor<1x64xf32>
+    %5797 = "ttir.relu"(%5795, %5796) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5798 = tensor.empty() : tensor<1x64xf32>
+    %5799 = "ttir.relu"(%5797, %5798) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5800 = tensor.empty() : tensor<1x64xf32>
+    %5801 = "ttir.relu"(%5799, %5800) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5802 = tensor.empty() : tensor<1x64xf32>
+    %5803 = "ttir.relu"(%5801, %5802) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5804 = tensor.empty() : tensor<1x64xf32>
+    %5805 = "ttir.relu"(%5803, %5804) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5806 = tensor.empty() : tensor<1x64xf32>
+    %5807 = "ttir.relu"(%5805, %5806) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5808 = tensor.empty() : tensor<1x64xf32>
+    %5809 = "ttir.relu"(%5807, %5808) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5810 = tensor.empty() : tensor<1x64xf32>
+    %5811 = "ttir.relu"(%5809, %5810) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5812 = tensor.empty() : tensor<1x64xf32>
+    %5813 = "ttir.relu"(%5811, %5812) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5814 = tensor.empty() : tensor<1x64xf32>
+    %5815 = "ttir.relu"(%5813, %5814) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5816 = tensor.empty() : tensor<1x64xf32>
+    %5817 = "ttir.relu"(%5815, %5816) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5818 = tensor.empty() : tensor<1x64xf32>
+    %5819 = "ttir.relu"(%5817, %5818) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5820 = tensor.empty() : tensor<1x64xf32>
+    %5821 = "ttir.relu"(%5819, %5820) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5822 = tensor.empty() : tensor<1x64xf32>
+    %5823 = "ttir.relu"(%5821, %5822) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5824 = tensor.empty() : tensor<1x64xf32>
+    %5825 = "ttir.relu"(%5823, %5824) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5826 = tensor.empty() : tensor<1x64xf32>
+    %5827 = "ttir.relu"(%5825, %5826) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5828 = tensor.empty() : tensor<1x64xf32>
+    %5829 = "ttir.relu"(%5827, %5828) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5830 = tensor.empty() : tensor<1x64xf32>
+    %5831 = "ttir.relu"(%5829, %5830) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5832 = tensor.empty() : tensor<1x64xf32>
+    %5833 = "ttir.relu"(%5831, %5832) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5834 = tensor.empty() : tensor<1x64xf32>
+    %5835 = "ttir.relu"(%5833, %5834) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5836 = tensor.empty() : tensor<1x64xf32>
+    %5837 = "ttir.relu"(%5835, %5836) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5838 = tensor.empty() : tensor<1x64xf32>
+    %5839 = "ttir.relu"(%5837, %5838) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5840 = tensor.empty() : tensor<1x64xf32>
+    %5841 = "ttir.relu"(%5839, %5840) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5842 = tensor.empty() : tensor<1x64xf32>
+    %5843 = "ttir.relu"(%5841, %5842) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5844 = tensor.empty() : tensor<1x64xf32>
+    %5845 = "ttir.relu"(%5843, %5844) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5846 = tensor.empty() : tensor<1x64xf32>
+    %5847 = "ttir.relu"(%5845, %5846) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5848 = tensor.empty() : tensor<1x64xf32>
+    %5849 = "ttir.relu"(%5847, %5848) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5850 = tensor.empty() : tensor<1x64xf32>
+    %5851 = "ttir.relu"(%5849, %5850) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5852 = tensor.empty() : tensor<1x64xf32>
+    %5853 = "ttir.relu"(%5851, %5852) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5854 = tensor.empty() : tensor<1x64xf32>
+    %5855 = "ttir.relu"(%5853, %5854) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5856 = tensor.empty() : tensor<1x64xf32>
+    %5857 = "ttir.relu"(%5855, %5856) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5858 = tensor.empty() : tensor<1x64xf32>
+    %5859 = "ttir.relu"(%5857, %5858) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5860 = tensor.empty() : tensor<1x64xf32>
+    %5861 = "ttir.relu"(%5859, %5860) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5862 = tensor.empty() : tensor<1x64xf32>
+    %5863 = "ttir.relu"(%5861, %5862) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5864 = tensor.empty() : tensor<1x64xf32>
+    %5865 = "ttir.relu"(%5863, %5864) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5866 = tensor.empty() : tensor<1x64xf32>
+    %5867 = "ttir.relu"(%5865, %5866) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5868 = tensor.empty() : tensor<1x64xf32>
+    %5869 = "ttir.relu"(%5867, %5868) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5870 = tensor.empty() : tensor<1x64xf32>
+    %5871 = "ttir.relu"(%5869, %5870) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5872 = tensor.empty() : tensor<1x64xf32>
+    %5873 = "ttir.relu"(%5871, %5872) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5874 = tensor.empty() : tensor<1x64xf32>
+    %5875 = "ttir.relu"(%5873, %5874) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5876 = tensor.empty() : tensor<1x64xf32>
+    %5877 = "ttir.relu"(%5875, %5876) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5878 = tensor.empty() : tensor<1x64xf32>
+    %5879 = "ttir.relu"(%5877, %5878) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5880 = tensor.empty() : tensor<1x64xf32>
+    %5881 = "ttir.relu"(%5879, %5880) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5882 = tensor.empty() : tensor<1x64xf32>
+    %5883 = "ttir.relu"(%5881, %5882) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5884 = tensor.empty() : tensor<1x64xf32>
+    %5885 = "ttir.relu"(%5883, %5884) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5886 = tensor.empty() : tensor<1x64xf32>
+    %5887 = "ttir.relu"(%5885, %5886) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5888 = tensor.empty() : tensor<1x64xf32>
+    %5889 = "ttir.relu"(%5887, %5888) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5890 = tensor.empty() : tensor<1x64xf32>
+    %5891 = "ttir.relu"(%5889, %5890) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5892 = tensor.empty() : tensor<1x64xf32>
+    %5893 = "ttir.relu"(%5891, %5892) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5894 = tensor.empty() : tensor<1x64xf32>
+    %5895 = "ttir.relu"(%5893, %5894) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5896 = tensor.empty() : tensor<1x64xf32>
+    %5897 = "ttir.relu"(%5895, %5896) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5898 = tensor.empty() : tensor<1x64xf32>
+    %5899 = "ttir.relu"(%5897, %5898) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5900 = tensor.empty() : tensor<1x64xf32>
+    %5901 = "ttir.relu"(%5899, %5900) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5902 = tensor.empty() : tensor<1x64xf32>
+    %5903 = "ttir.relu"(%5901, %5902) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5904 = tensor.empty() : tensor<1x64xf32>
+    %5905 = "ttir.relu"(%5903, %5904) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5906 = tensor.empty() : tensor<1x64xf32>
+    %5907 = "ttir.relu"(%5905, %5906) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5908 = tensor.empty() : tensor<1x64xf32>
+    %5909 = "ttir.relu"(%5907, %5908) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5910 = tensor.empty() : tensor<1x64xf32>
+    %5911 = "ttir.relu"(%5909, %5910) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5912 = tensor.empty() : tensor<1x64xf32>
+    %5913 = "ttir.relu"(%5911, %5912) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5914 = tensor.empty() : tensor<1x64xf32>
+    %5915 = "ttir.relu"(%5913, %5914) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5916 = tensor.empty() : tensor<1x64xf32>
+    %5917 = "ttir.relu"(%5915, %5916) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5918 = tensor.empty() : tensor<1x64xf32>
+    %5919 = "ttir.relu"(%5917, %5918) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5920 = tensor.empty() : tensor<1x64xf32>
+    %5921 = "ttir.relu"(%5919, %5920) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5922 = tensor.empty() : tensor<1x64xf32>
+    %5923 = "ttir.relu"(%5921, %5922) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5924 = tensor.empty() : tensor<1x64xf32>
+    %5925 = "ttir.relu"(%5923, %5924) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5926 = tensor.empty() : tensor<1x64xf32>
+    %5927 = "ttir.relu"(%5925, %5926) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5928 = tensor.empty() : tensor<1x64xf32>
+    %5929 = "ttir.relu"(%5927, %5928) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5930 = tensor.empty() : tensor<1x64xf32>
+    %5931 = "ttir.relu"(%5929, %5930) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5932 = tensor.empty() : tensor<1x64xf32>
+    %5933 = "ttir.relu"(%5931, %5932) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5934 = tensor.empty() : tensor<1x64xf32>
+    %5935 = "ttir.relu"(%5933, %5934) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5936 = tensor.empty() : tensor<1x64xf32>
+    %5937 = "ttir.relu"(%5935, %5936) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5938 = tensor.empty() : tensor<1x64xf32>
+    %5939 = "ttir.relu"(%5937, %5938) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5940 = tensor.empty() : tensor<1x64xf32>
+    %5941 = "ttir.relu"(%5939, %5940) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5942 = tensor.empty() : tensor<1x64xf32>
+    %5943 = "ttir.relu"(%5941, %5942) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5944 = tensor.empty() : tensor<1x64xf32>
+    %5945 = "ttir.relu"(%5943, %5944) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5946 = tensor.empty() : tensor<1x64xf32>
+    %5947 = "ttir.relu"(%5945, %5946) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5948 = tensor.empty() : tensor<1x64xf32>
+    %5949 = "ttir.relu"(%5947, %5948) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5950 = tensor.empty() : tensor<1x64xf32>
+    %5951 = "ttir.relu"(%5949, %5950) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5952 = tensor.empty() : tensor<1x64xf32>
+    %5953 = "ttir.relu"(%5951, %5952) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5954 = tensor.empty() : tensor<1x64xf32>
+    %5955 = "ttir.relu"(%5953, %5954) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5956 = tensor.empty() : tensor<1x64xf32>
+    %5957 = "ttir.relu"(%5955, %5956) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5958 = tensor.empty() : tensor<1x64xf32>
+    %5959 = "ttir.relu"(%5957, %5958) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5960 = tensor.empty() : tensor<1x64xf32>
+    %5961 = "ttir.relu"(%5959, %5960) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5962 = tensor.empty() : tensor<1x64xf32>
+    %5963 = "ttir.relu"(%5961, %5962) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5964 = tensor.empty() : tensor<1x64xf32>
+    %5965 = "ttir.relu"(%5963, %5964) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5966 = tensor.empty() : tensor<1x64xf32>
+    %5967 = "ttir.relu"(%5965, %5966) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5968 = tensor.empty() : tensor<1x64xf32>
+    %5969 = "ttir.relu"(%5967, %5968) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5970 = tensor.empty() : tensor<1x64xf32>
+    %5971 = "ttir.relu"(%5969, %5970) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5972 = tensor.empty() : tensor<1x64xf32>
+    %5973 = "ttir.relu"(%5971, %5972) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5974 = tensor.empty() : tensor<1x64xf32>
+    %5975 = "ttir.relu"(%5973, %5974) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5976 = tensor.empty() : tensor<1x64xf32>
+    %5977 = "ttir.relu"(%5975, %5976) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5978 = tensor.empty() : tensor<1x64xf32>
+    %5979 = "ttir.relu"(%5977, %5978) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5980 = tensor.empty() : tensor<1x64xf32>
+    %5981 = "ttir.relu"(%5979, %5980) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5982 = tensor.empty() : tensor<1x64xf32>
+    %5983 = "ttir.relu"(%5981, %5982) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5984 = tensor.empty() : tensor<1x64xf32>
+    %5985 = "ttir.relu"(%5983, %5984) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5986 = tensor.empty() : tensor<1x64xf32>
+    %5987 = "ttir.relu"(%5985, %5986) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5988 = tensor.empty() : tensor<1x64xf32>
+    %5989 = "ttir.relu"(%5987, %5988) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5990 = tensor.empty() : tensor<1x64xf32>
+    %5991 = "ttir.relu"(%5989, %5990) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5992 = tensor.empty() : tensor<1x64xf32>
+    %5993 = "ttir.relu"(%5991, %5992) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5994 = tensor.empty() : tensor<1x64xf32>
+    %5995 = "ttir.relu"(%5993, %5994) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5996 = tensor.empty() : tensor<1x64xf32>
+    %5997 = "ttir.relu"(%5995, %5996) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %5998 = tensor.empty() : tensor<1x64xf32>
+    %5999 = "ttir.relu"(%5997, %5998) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6000 = tensor.empty() : tensor<1x64xf32>
+    %6001 = "ttir.relu"(%5999, %6000) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6002 = tensor.empty() : tensor<1x64xf32>
+    %6003 = "ttir.relu"(%6001, %6002) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6004 = tensor.empty() : tensor<1x64xf32>
+    %6005 = "ttir.relu"(%6003, %6004) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6006 = tensor.empty() : tensor<1x64xf32>
+    %6007 = "ttir.relu"(%6005, %6006) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6008 = tensor.empty() : tensor<1x64xf32>
+    %6009 = "ttir.relu"(%6007, %6008) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6010 = tensor.empty() : tensor<1x64xf32>
+    %6011 = "ttir.relu"(%6009, %6010) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6012 = tensor.empty() : tensor<1x64xf32>
+    %6013 = "ttir.relu"(%6011, %6012) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6014 = tensor.empty() : tensor<1x64xf32>
+    %6015 = "ttir.relu"(%6013, %6014) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6016 = tensor.empty() : tensor<1x64xf32>
+    %6017 = "ttir.relu"(%6015, %6016) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6018 = tensor.empty() : tensor<1x64xf32>
+    %6019 = "ttir.relu"(%6017, %6018) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6020 = tensor.empty() : tensor<1x64xf32>
+    %6021 = "ttir.relu"(%6019, %6020) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6022 = tensor.empty() : tensor<1x64xf32>
+    %6023 = "ttir.relu"(%6021, %6022) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6024 = tensor.empty() : tensor<1x64xf32>
+    %6025 = "ttir.relu"(%6023, %6024) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6026 = tensor.empty() : tensor<1x64xf32>
+    %6027 = "ttir.relu"(%6025, %6026) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6028 = tensor.empty() : tensor<1x64xf32>
+    %6029 = "ttir.relu"(%6027, %6028) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6030 = tensor.empty() : tensor<1x64xf32>
+    %6031 = "ttir.relu"(%6029, %6030) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6032 = tensor.empty() : tensor<1x64xf32>
+    %6033 = "ttir.relu"(%6031, %6032) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6034 = tensor.empty() : tensor<1x64xf32>
+    %6035 = "ttir.relu"(%6033, %6034) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6036 = tensor.empty() : tensor<1x64xf32>
+    %6037 = "ttir.relu"(%6035, %6036) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6038 = tensor.empty() : tensor<1x64xf32>
+    %6039 = "ttir.relu"(%6037, %6038) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6040 = tensor.empty() : tensor<1x64xf32>
+    %6041 = "ttir.relu"(%6039, %6040) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6042 = tensor.empty() : tensor<1x64xf32>
+    %6043 = "ttir.relu"(%6041, %6042) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6044 = tensor.empty() : tensor<1x64xf32>
+    %6045 = "ttir.relu"(%6043, %6044) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6046 = tensor.empty() : tensor<1x64xf32>
+    %6047 = "ttir.relu"(%6045, %6046) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6048 = tensor.empty() : tensor<1x64xf32>
+    %6049 = "ttir.relu"(%6047, %6048) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6050 = tensor.empty() : tensor<1x64xf32>
+    %6051 = "ttir.relu"(%6049, %6050) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6052 = tensor.empty() : tensor<1x64xf32>
+    %6053 = "ttir.relu"(%6051, %6052) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6054 = tensor.empty() : tensor<1x64xf32>
+    %6055 = "ttir.relu"(%6053, %6054) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6056 = tensor.empty() : tensor<1x64xf32>
+    %6057 = "ttir.relu"(%6055, %6056) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6058 = tensor.empty() : tensor<1x64xf32>
+    %6059 = "ttir.relu"(%6057, %6058) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6060 = tensor.empty() : tensor<1x64xf32>
+    %6061 = "ttir.relu"(%6059, %6060) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6062 = tensor.empty() : tensor<1x64xf32>
+    %6063 = "ttir.relu"(%6061, %6062) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6064 = tensor.empty() : tensor<1x64xf32>
+    %6065 = "ttir.relu"(%6063, %6064) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6066 = tensor.empty() : tensor<1x64xf32>
+    %6067 = "ttir.relu"(%6065, %6066) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6068 = tensor.empty() : tensor<1x64xf32>
+    %6069 = "ttir.relu"(%6067, %6068) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6070 = tensor.empty() : tensor<1x64xf32>
+    %6071 = "ttir.relu"(%6069, %6070) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6072 = tensor.empty() : tensor<1x64xf32>
+    %6073 = "ttir.relu"(%6071, %6072) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6074 = tensor.empty() : tensor<1x64xf32>
+    %6075 = "ttir.relu"(%6073, %6074) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6076 = tensor.empty() : tensor<1x64xf32>
+    %6077 = "ttir.relu"(%6075, %6076) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6078 = tensor.empty() : tensor<1x64xf32>
+    %6079 = "ttir.relu"(%6077, %6078) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6080 = tensor.empty() : tensor<1x64xf32>
+    %6081 = "ttir.relu"(%6079, %6080) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6082 = tensor.empty() : tensor<1x64xf32>
+    %6083 = "ttir.relu"(%6081, %6082) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6084 = tensor.empty() : tensor<1x64xf32>
+    %6085 = "ttir.relu"(%6083, %6084) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6086 = tensor.empty() : tensor<1x64xf32>
+    %6087 = "ttir.relu"(%6085, %6086) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6088 = tensor.empty() : tensor<1x64xf32>
+    %6089 = "ttir.relu"(%6087, %6088) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6090 = tensor.empty() : tensor<1x64xf32>
+    %6091 = "ttir.relu"(%6089, %6090) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6092 = tensor.empty() : tensor<1x64xf32>
+    %6093 = "ttir.relu"(%6091, %6092) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6094 = tensor.empty() : tensor<1x64xf32>
+    %6095 = "ttir.relu"(%6093, %6094) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6096 = tensor.empty() : tensor<1x64xf32>
+    %6097 = "ttir.relu"(%6095, %6096) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6098 = tensor.empty() : tensor<1x64xf32>
+    %6099 = "ttir.relu"(%6097, %6098) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6100 = tensor.empty() : tensor<1x64xf32>
+    %6101 = "ttir.relu"(%6099, %6100) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6102 = tensor.empty() : tensor<1x64xf32>
+    %6103 = "ttir.relu"(%6101, %6102) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6104 = tensor.empty() : tensor<1x64xf32>
+    %6105 = "ttir.relu"(%6103, %6104) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6106 = tensor.empty() : tensor<1x64xf32>
+    %6107 = "ttir.relu"(%6105, %6106) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6108 = tensor.empty() : tensor<1x64xf32>
+    %6109 = "ttir.relu"(%6107, %6108) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6110 = tensor.empty() : tensor<1x64xf32>
+    %6111 = "ttir.relu"(%6109, %6110) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6112 = tensor.empty() : tensor<1x64xf32>
+    %6113 = "ttir.relu"(%6111, %6112) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6114 = tensor.empty() : tensor<1x64xf32>
+    %6115 = "ttir.relu"(%6113, %6114) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6116 = tensor.empty() : tensor<1x64xf32>
+    %6117 = "ttir.relu"(%6115, %6116) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6118 = tensor.empty() : tensor<1x64xf32>
+    %6119 = "ttir.relu"(%6117, %6118) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6120 = tensor.empty() : tensor<1x64xf32>
+    %6121 = "ttir.relu"(%6119, %6120) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6122 = tensor.empty() : tensor<1x64xf32>
+    %6123 = "ttir.relu"(%6121, %6122) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6124 = tensor.empty() : tensor<1x64xf32>
+    %6125 = "ttir.relu"(%6123, %6124) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6126 = tensor.empty() : tensor<1x64xf32>
+    %6127 = "ttir.relu"(%6125, %6126) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6128 = tensor.empty() : tensor<1x64xf32>
+    %6129 = "ttir.relu"(%6127, %6128) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6130 = tensor.empty() : tensor<1x64xf32>
+    %6131 = "ttir.relu"(%6129, %6130) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6132 = tensor.empty() : tensor<1x64xf32>
+    %6133 = "ttir.relu"(%6131, %6132) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6134 = tensor.empty() : tensor<1x64xf32>
+    %6135 = "ttir.relu"(%6133, %6134) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6136 = tensor.empty() : tensor<1x64xf32>
+    %6137 = "ttir.relu"(%6135, %6136) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6138 = tensor.empty() : tensor<1x64xf32>
+    %6139 = "ttir.relu"(%6137, %6138) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6140 = tensor.empty() : tensor<1x64xf32>
+    %6141 = "ttir.relu"(%6139, %6140) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6142 = tensor.empty() : tensor<1x64xf32>
+    %6143 = "ttir.relu"(%6141, %6142) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6144 = tensor.empty() : tensor<1x64xf32>
+    %6145 = "ttir.relu"(%6143, %6144) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6146 = tensor.empty() : tensor<1x64xf32>
+    %6147 = "ttir.relu"(%6145, %6146) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6148 = tensor.empty() : tensor<1x64xf32>
+    %6149 = "ttir.relu"(%6147, %6148) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6150 = tensor.empty() : tensor<1x64xf32>
+    %6151 = "ttir.relu"(%6149, %6150) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6152 = tensor.empty() : tensor<1x64xf32>
+    %6153 = "ttir.relu"(%6151, %6152) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6154 = tensor.empty() : tensor<1x64xf32>
+    %6155 = "ttir.relu"(%6153, %6154) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6156 = tensor.empty() : tensor<1x64xf32>
+    %6157 = "ttir.relu"(%6155, %6156) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6158 = tensor.empty() : tensor<1x64xf32>
+    %6159 = "ttir.relu"(%6157, %6158) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6160 = tensor.empty() : tensor<1x64xf32>
+    %6161 = "ttir.relu"(%6159, %6160) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6162 = tensor.empty() : tensor<1x64xf32>
+    %6163 = "ttir.relu"(%6161, %6162) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6164 = tensor.empty() : tensor<1x64xf32>
+    %6165 = "ttir.relu"(%6163, %6164) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6166 = tensor.empty() : tensor<1x64xf32>
+    %6167 = "ttir.relu"(%6165, %6166) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6168 = tensor.empty() : tensor<1x64xf32>
+    %6169 = "ttir.relu"(%6167, %6168) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6170 = tensor.empty() : tensor<1x64xf32>
+    %6171 = "ttir.relu"(%6169, %6170) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6172 = tensor.empty() : tensor<1x64xf32>
+    %6173 = "ttir.relu"(%6171, %6172) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6174 = tensor.empty() : tensor<1x64xf32>
+    %6175 = "ttir.relu"(%6173, %6174) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6176 = tensor.empty() : tensor<1x64xf32>
+    %6177 = "ttir.relu"(%6175, %6176) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6178 = tensor.empty() : tensor<1x64xf32>
+    %6179 = "ttir.relu"(%6177, %6178) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6180 = tensor.empty() : tensor<1x64xf32>
+    %6181 = "ttir.relu"(%6179, %6180) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6182 = tensor.empty() : tensor<1x64xf32>
+    %6183 = "ttir.relu"(%6181, %6182) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6184 = tensor.empty() : tensor<1x64xf32>
+    %6185 = "ttir.relu"(%6183, %6184) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6186 = tensor.empty() : tensor<1x64xf32>
+    %6187 = "ttir.relu"(%6185, %6186) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6188 = tensor.empty() : tensor<1x64xf32>
+    %6189 = "ttir.relu"(%6187, %6188) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6190 = tensor.empty() : tensor<1x64xf32>
+    %6191 = "ttir.relu"(%6189, %6190) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6192 = tensor.empty() : tensor<1x64xf32>
+    %6193 = "ttir.relu"(%6191, %6192) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6194 = tensor.empty() : tensor<1x64xf32>
+    %6195 = "ttir.relu"(%6193, %6194) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6196 = tensor.empty() : tensor<1x64xf32>
+    %6197 = "ttir.relu"(%6195, %6196) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6198 = tensor.empty() : tensor<1x64xf32>
+    %6199 = "ttir.relu"(%6197, %6198) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6200 = tensor.empty() : tensor<1x64xf32>
+    %6201 = "ttir.relu"(%6199, %6200) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6202 = tensor.empty() : tensor<1x64xf32>
+    %6203 = "ttir.relu"(%6201, %6202) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6204 = tensor.empty() : tensor<1x64xf32>
+    %6205 = "ttir.relu"(%6203, %6204) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6206 = tensor.empty() : tensor<1x64xf32>
+    %6207 = "ttir.relu"(%6205, %6206) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6208 = tensor.empty() : tensor<1x64xf32>
+    %6209 = "ttir.relu"(%6207, %6208) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6210 = tensor.empty() : tensor<1x64xf32>
+    %6211 = "ttir.relu"(%6209, %6210) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6212 = tensor.empty() : tensor<1x64xf32>
+    %6213 = "ttir.relu"(%6211, %6212) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6214 = tensor.empty() : tensor<1x64xf32>
+    %6215 = "ttir.relu"(%6213, %6214) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6216 = tensor.empty() : tensor<1x64xf32>
+    %6217 = "ttir.relu"(%6215, %6216) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6218 = tensor.empty() : tensor<1x64xf32>
+    %6219 = "ttir.relu"(%6217, %6218) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6220 = tensor.empty() : tensor<1x64xf32>
+    %6221 = "ttir.relu"(%6219, %6220) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6222 = tensor.empty() : tensor<1x64xf32>
+    %6223 = "ttir.relu"(%6221, %6222) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6224 = tensor.empty() : tensor<1x64xf32>
+    %6225 = "ttir.relu"(%6223, %6224) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6226 = tensor.empty() : tensor<1x64xf32>
+    %6227 = "ttir.relu"(%6225, %6226) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6228 = tensor.empty() : tensor<1x64xf32>
+    %6229 = "ttir.relu"(%6227, %6228) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6230 = tensor.empty() : tensor<1x64xf32>
+    %6231 = "ttir.relu"(%6229, %6230) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6232 = tensor.empty() : tensor<1x64xf32>
+    %6233 = "ttir.relu"(%6231, %6232) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6234 = tensor.empty() : tensor<1x64xf32>
+    %6235 = "ttir.relu"(%6233, %6234) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6236 = tensor.empty() : tensor<1x64xf32>
+    %6237 = "ttir.relu"(%6235, %6236) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6238 = tensor.empty() : tensor<1x64xf32>
+    %6239 = "ttir.relu"(%6237, %6238) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6240 = tensor.empty() : tensor<1x64xf32>
+    %6241 = "ttir.relu"(%6239, %6240) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6242 = tensor.empty() : tensor<1x64xf32>
+    %6243 = "ttir.relu"(%6241, %6242) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6244 = tensor.empty() : tensor<1x64xf32>
+    %6245 = "ttir.relu"(%6243, %6244) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6246 = tensor.empty() : tensor<1x64xf32>
+    %6247 = "ttir.relu"(%6245, %6246) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6248 = tensor.empty() : tensor<1x64xf32>
+    %6249 = "ttir.relu"(%6247, %6248) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6250 = tensor.empty() : tensor<1x64xf32>
+    %6251 = "ttir.relu"(%6249, %6250) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6252 = tensor.empty() : tensor<1x64xf32>
+    %6253 = "ttir.relu"(%6251, %6252) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6254 = tensor.empty() : tensor<1x64xf32>
+    %6255 = "ttir.relu"(%6253, %6254) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6256 = tensor.empty() : tensor<1x64xf32>
+    %6257 = "ttir.relu"(%6255, %6256) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6258 = tensor.empty() : tensor<1x64xf32>
+    %6259 = "ttir.relu"(%6257, %6258) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6260 = tensor.empty() : tensor<1x64xf32>
+    %6261 = "ttir.relu"(%6259, %6260) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6262 = tensor.empty() : tensor<1x64xf32>
+    %6263 = "ttir.relu"(%6261, %6262) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6264 = tensor.empty() : tensor<1x64xf32>
+    %6265 = "ttir.relu"(%6263, %6264) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6266 = tensor.empty() : tensor<1x64xf32>
+    %6267 = "ttir.relu"(%6265, %6266) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6268 = tensor.empty() : tensor<1x64xf32>
+    %6269 = "ttir.relu"(%6267, %6268) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6270 = tensor.empty() : tensor<1x64xf32>
+    %6271 = "ttir.relu"(%6269, %6270) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6272 = tensor.empty() : tensor<1x64xf32>
+    %6273 = "ttir.relu"(%6271, %6272) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6274 = tensor.empty() : tensor<1x64xf32>
+    %6275 = "ttir.relu"(%6273, %6274) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6276 = tensor.empty() : tensor<1x64xf32>
+    %6277 = "ttir.relu"(%6275, %6276) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6278 = tensor.empty() : tensor<1x64xf32>
+    %6279 = "ttir.relu"(%6277, %6278) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6280 = tensor.empty() : tensor<1x64xf32>
+    %6281 = "ttir.relu"(%6279, %6280) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6282 = tensor.empty() : tensor<1x64xf32>
+    %6283 = "ttir.relu"(%6281, %6282) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6284 = tensor.empty() : tensor<1x64xf32>
+    %6285 = "ttir.relu"(%6283, %6284) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6286 = tensor.empty() : tensor<1x64xf32>
+    %6287 = "ttir.relu"(%6285, %6286) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6288 = tensor.empty() : tensor<1x64xf32>
+    %6289 = "ttir.relu"(%6287, %6288) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6290 = tensor.empty() : tensor<1x64xf32>
+    %6291 = "ttir.relu"(%6289, %6290) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6292 = tensor.empty() : tensor<1x64xf32>
+    %6293 = "ttir.relu"(%6291, %6292) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6294 = tensor.empty() : tensor<1x64xf32>
+    %6295 = "ttir.relu"(%6293, %6294) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6296 = tensor.empty() : tensor<1x64xf32>
+    %6297 = "ttir.relu"(%6295, %6296) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6298 = tensor.empty() : tensor<1x64xf32>
+    %6299 = "ttir.relu"(%6297, %6298) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6300 = tensor.empty() : tensor<1x64xf32>
+    %6301 = "ttir.relu"(%6299, %6300) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6302 = tensor.empty() : tensor<1x64xf32>
+    %6303 = "ttir.relu"(%6301, %6302) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6304 = tensor.empty() : tensor<1x64xf32>
+    %6305 = "ttir.relu"(%6303, %6304) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6306 = tensor.empty() : tensor<1x64xf32>
+    %6307 = "ttir.relu"(%6305, %6306) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6308 = tensor.empty() : tensor<1x64xf32>
+    %6309 = "ttir.relu"(%6307, %6308) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6310 = tensor.empty() : tensor<1x64xf32>
+    %6311 = "ttir.relu"(%6309, %6310) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6312 = tensor.empty() : tensor<1x64xf32>
+    %6313 = "ttir.relu"(%6311, %6312) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6314 = tensor.empty() : tensor<1x64xf32>
+    %6315 = "ttir.relu"(%6313, %6314) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6316 = tensor.empty() : tensor<1x64xf32>
+    %6317 = "ttir.relu"(%6315, %6316) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6318 = tensor.empty() : tensor<1x64xf32>
+    %6319 = "ttir.relu"(%6317, %6318) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6320 = tensor.empty() : tensor<1x64xf32>
+    %6321 = "ttir.relu"(%6319, %6320) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6322 = tensor.empty() : tensor<1x64xf32>
+    %6323 = "ttir.relu"(%6321, %6322) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6324 = tensor.empty() : tensor<1x64xf32>
+    %6325 = "ttir.relu"(%6323, %6324) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6326 = tensor.empty() : tensor<1x64xf32>
+    %6327 = "ttir.relu"(%6325, %6326) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6328 = tensor.empty() : tensor<1x64xf32>
+    %6329 = "ttir.relu"(%6327, %6328) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6330 = tensor.empty() : tensor<1x64xf32>
+    %6331 = "ttir.relu"(%6329, %6330) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6332 = tensor.empty() : tensor<1x64xf32>
+    %6333 = "ttir.relu"(%6331, %6332) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6334 = tensor.empty() : tensor<1x64xf32>
+    %6335 = "ttir.relu"(%6333, %6334) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6336 = tensor.empty() : tensor<1x64xf32>
+    %6337 = "ttir.relu"(%6335, %6336) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6338 = tensor.empty() : tensor<1x64xf32>
+    %6339 = "ttir.relu"(%6337, %6338) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6340 = tensor.empty() : tensor<1x64xf32>
+    %6341 = "ttir.relu"(%6339, %6340) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6342 = tensor.empty() : tensor<1x64xf32>
+    %6343 = "ttir.relu"(%6341, %6342) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6344 = tensor.empty() : tensor<1x64xf32>
+    %6345 = "ttir.relu"(%6343, %6344) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6346 = tensor.empty() : tensor<1x64xf32>
+    %6347 = "ttir.relu"(%6345, %6346) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6348 = tensor.empty() : tensor<1x64xf32>
+    %6349 = "ttir.relu"(%6347, %6348) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6350 = tensor.empty() : tensor<1x64xf32>
+    %6351 = "ttir.relu"(%6349, %6350) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6352 = tensor.empty() : tensor<1x64xf32>
+    %6353 = "ttir.relu"(%6351, %6352) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6354 = tensor.empty() : tensor<1x64xf32>
+    %6355 = "ttir.relu"(%6353, %6354) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6356 = tensor.empty() : tensor<1x64xf32>
+    %6357 = "ttir.relu"(%6355, %6356) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6358 = tensor.empty() : tensor<1x64xf32>
+    %6359 = "ttir.relu"(%6357, %6358) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6360 = tensor.empty() : tensor<1x64xf32>
+    %6361 = "ttir.relu"(%6359, %6360) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6362 = tensor.empty() : tensor<1x64xf32>
+    %6363 = "ttir.relu"(%6361, %6362) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6364 = tensor.empty() : tensor<1x64xf32>
+    %6365 = "ttir.relu"(%6363, %6364) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6366 = tensor.empty() : tensor<1x64xf32>
+    %6367 = "ttir.relu"(%6365, %6366) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6368 = tensor.empty() : tensor<1x64xf32>
+    %6369 = "ttir.relu"(%6367, %6368) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6370 = tensor.empty() : tensor<1x64xf32>
+    %6371 = "ttir.relu"(%6369, %6370) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6372 = tensor.empty() : tensor<1x64xf32>
+    %6373 = "ttir.relu"(%6371, %6372) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6374 = tensor.empty() : tensor<1x64xf32>
+    %6375 = "ttir.relu"(%6373, %6374) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6376 = tensor.empty() : tensor<1x64xf32>
+    %6377 = "ttir.relu"(%6375, %6376) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6378 = tensor.empty() : tensor<1x64xf32>
+    %6379 = "ttir.relu"(%6377, %6378) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6380 = tensor.empty() : tensor<1x64xf32>
+    %6381 = "ttir.relu"(%6379, %6380) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6382 = tensor.empty() : tensor<1x64xf32>
+    %6383 = "ttir.relu"(%6381, %6382) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6384 = tensor.empty() : tensor<1x64xf32>
+    %6385 = "ttir.relu"(%6383, %6384) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6386 = tensor.empty() : tensor<1x64xf32>
+    %6387 = "ttir.relu"(%6385, %6386) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6388 = tensor.empty() : tensor<1x64xf32>
+    %6389 = "ttir.relu"(%6387, %6388) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6390 = tensor.empty() : tensor<1x64xf32>
+    %6391 = "ttir.relu"(%6389, %6390) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6392 = tensor.empty() : tensor<1x64xf32>
+    %6393 = "ttir.relu"(%6391, %6392) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6394 = tensor.empty() : tensor<1x64xf32>
+    %6395 = "ttir.relu"(%6393, %6394) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6396 = tensor.empty() : tensor<1x64xf32>
+    %6397 = "ttir.relu"(%6395, %6396) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6398 = tensor.empty() : tensor<1x64xf32>
+    %6399 = "ttir.relu"(%6397, %6398) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6400 = tensor.empty() : tensor<1x64xf32>
+    %6401 = "ttir.relu"(%6399, %6400) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6402 = tensor.empty() : tensor<1x64xf32>
+    %6403 = "ttir.relu"(%6401, %6402) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6404 = tensor.empty() : tensor<1x64xf32>
+    %6405 = "ttir.relu"(%6403, %6404) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6406 = tensor.empty() : tensor<1x64xf32>
+    %6407 = "ttir.relu"(%6405, %6406) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6408 = tensor.empty() : tensor<1x64xf32>
+    %6409 = "ttir.relu"(%6407, %6408) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6410 = tensor.empty() : tensor<1x64xf32>
+    %6411 = "ttir.relu"(%6409, %6410) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6412 = tensor.empty() : tensor<1x64xf32>
+    %6413 = "ttir.relu"(%6411, %6412) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6414 = tensor.empty() : tensor<1x64xf32>
+    %6415 = "ttir.relu"(%6413, %6414) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6416 = tensor.empty() : tensor<1x64xf32>
+    %6417 = "ttir.relu"(%6415, %6416) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6418 = tensor.empty() : tensor<1x64xf32>
+    %6419 = "ttir.relu"(%6417, %6418) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6420 = tensor.empty() : tensor<1x64xf32>
+    %6421 = "ttir.relu"(%6419, %6420) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6422 = tensor.empty() : tensor<1x64xf32>
+    %6423 = "ttir.relu"(%6421, %6422) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6424 = tensor.empty() : tensor<1x64xf32>
+    %6425 = "ttir.relu"(%6423, %6424) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6426 = tensor.empty() : tensor<1x64xf32>
+    %6427 = "ttir.relu"(%6425, %6426) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6428 = tensor.empty() : tensor<1x64xf32>
+    %6429 = "ttir.relu"(%6427, %6428) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6430 = tensor.empty() : tensor<1x64xf32>
+    %6431 = "ttir.relu"(%6429, %6430) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6432 = tensor.empty() : tensor<1x64xf32>
+    %6433 = "ttir.relu"(%6431, %6432) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6434 = tensor.empty() : tensor<1x64xf32>
+    %6435 = "ttir.relu"(%6433, %6434) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6436 = tensor.empty() : tensor<1x64xf32>
+    %6437 = "ttir.relu"(%6435, %6436) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6438 = tensor.empty() : tensor<1x64xf32>
+    %6439 = "ttir.relu"(%6437, %6438) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6440 = tensor.empty() : tensor<1x64xf32>
+    %6441 = "ttir.relu"(%6439, %6440) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6442 = tensor.empty() : tensor<1x64xf32>
+    %6443 = "ttir.relu"(%6441, %6442) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6444 = tensor.empty() : tensor<1x64xf32>
+    %6445 = "ttir.relu"(%6443, %6444) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6446 = tensor.empty() : tensor<1x64xf32>
+    %6447 = "ttir.relu"(%6445, %6446) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6448 = tensor.empty() : tensor<1x64xf32>
+    %6449 = "ttir.relu"(%6447, %6448) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6450 = tensor.empty() : tensor<1x64xf32>
+    %6451 = "ttir.relu"(%6449, %6450) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6452 = tensor.empty() : tensor<1x64xf32>
+    %6453 = "ttir.relu"(%6451, %6452) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6454 = tensor.empty() : tensor<1x64xf32>
+    %6455 = "ttir.relu"(%6453, %6454) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6456 = tensor.empty() : tensor<1x64xf32>
+    %6457 = "ttir.relu"(%6455, %6456) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6458 = tensor.empty() : tensor<1x64xf32>
+    %6459 = "ttir.relu"(%6457, %6458) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6460 = tensor.empty() : tensor<1x64xf32>
+    %6461 = "ttir.relu"(%6459, %6460) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6462 = tensor.empty() : tensor<1x64xf32>
+    %6463 = "ttir.relu"(%6461, %6462) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6464 = tensor.empty() : tensor<1x64xf32>
+    %6465 = "ttir.relu"(%6463, %6464) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6466 = tensor.empty() : tensor<1x64xf32>
+    %6467 = "ttir.relu"(%6465, %6466) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6468 = tensor.empty() : tensor<1x64xf32>
+    %6469 = "ttir.relu"(%6467, %6468) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6470 = tensor.empty() : tensor<1x64xf32>
+    %6471 = "ttir.relu"(%6469, %6470) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6472 = tensor.empty() : tensor<1x64xf32>
+    %6473 = "ttir.relu"(%6471, %6472) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6474 = tensor.empty() : tensor<1x64xf32>
+    %6475 = "ttir.relu"(%6473, %6474) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6476 = tensor.empty() : tensor<1x64xf32>
+    %6477 = "ttir.relu"(%6475, %6476) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6478 = tensor.empty() : tensor<1x64xf32>
+    %6479 = "ttir.relu"(%6477, %6478) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6480 = tensor.empty() : tensor<1x64xf32>
+    %6481 = "ttir.relu"(%6479, %6480) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6482 = tensor.empty() : tensor<1x64xf32>
+    %6483 = "ttir.relu"(%6481, %6482) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6484 = tensor.empty() : tensor<1x64xf32>
+    %6485 = "ttir.relu"(%6483, %6484) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6486 = tensor.empty() : tensor<1x64xf32>
+    %6487 = "ttir.relu"(%6485, %6486) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6488 = tensor.empty() : tensor<1x64xf32>
+    %6489 = "ttir.relu"(%6487, %6488) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6490 = tensor.empty() : tensor<1x64xf32>
+    %6491 = "ttir.relu"(%6489, %6490) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6492 = tensor.empty() : tensor<1x64xf32>
+    %6493 = "ttir.relu"(%6491, %6492) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6494 = tensor.empty() : tensor<1x64xf32>
+    %6495 = "ttir.relu"(%6493, %6494) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6496 = tensor.empty() : tensor<1x64xf32>
+    %6497 = "ttir.relu"(%6495, %6496) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6498 = tensor.empty() : tensor<1x64xf32>
+    %6499 = "ttir.relu"(%6497, %6498) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6500 = tensor.empty() : tensor<1x64xf32>
+    %6501 = "ttir.relu"(%6499, %6500) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6502 = tensor.empty() : tensor<1x64xf32>
+    %6503 = "ttir.relu"(%6501, %6502) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6504 = tensor.empty() : tensor<1x64xf32>
+    %6505 = "ttir.relu"(%6503, %6504) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6506 = tensor.empty() : tensor<1x64xf32>
+    %6507 = "ttir.relu"(%6505, %6506) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6508 = tensor.empty() : tensor<1x64xf32>
+    %6509 = "ttir.relu"(%6507, %6508) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6510 = tensor.empty() : tensor<1x64xf32>
+    %6511 = "ttir.relu"(%6509, %6510) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6512 = tensor.empty() : tensor<1x64xf32>
+    %6513 = "ttir.relu"(%6511, %6512) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6514 = tensor.empty() : tensor<1x64xf32>
+    %6515 = "ttir.relu"(%6513, %6514) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6516 = tensor.empty() : tensor<1x64xf32>
+    %6517 = "ttir.relu"(%6515, %6516) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6518 = tensor.empty() : tensor<1x64xf32>
+    %6519 = "ttir.relu"(%6517, %6518) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6520 = tensor.empty() : tensor<1x64xf32>
+    %6521 = "ttir.relu"(%6519, %6520) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6522 = tensor.empty() : tensor<1x64xf32>
+    %6523 = "ttir.relu"(%6521, %6522) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6524 = tensor.empty() : tensor<1x64xf32>
+    %6525 = "ttir.relu"(%6523, %6524) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6526 = tensor.empty() : tensor<1x64xf32>
+    %6527 = "ttir.relu"(%6525, %6526) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6528 = tensor.empty() : tensor<1x64xf32>
+    %6529 = "ttir.relu"(%6527, %6528) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6530 = tensor.empty() : tensor<1x64xf32>
+    %6531 = "ttir.relu"(%6529, %6530) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6532 = tensor.empty() : tensor<1x64xf32>
+    %6533 = "ttir.relu"(%6531, %6532) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6534 = tensor.empty() : tensor<1x64xf32>
+    %6535 = "ttir.relu"(%6533, %6534) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6536 = tensor.empty() : tensor<1x64xf32>
+    %6537 = "ttir.relu"(%6535, %6536) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6538 = tensor.empty() : tensor<1x64xf32>
+    %6539 = "ttir.relu"(%6537, %6538) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6540 = tensor.empty() : tensor<1x64xf32>
+    %6541 = "ttir.relu"(%6539, %6540) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6542 = tensor.empty() : tensor<1x64xf32>
+    %6543 = "ttir.relu"(%6541, %6542) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6544 = tensor.empty() : tensor<1x64xf32>
+    %6545 = "ttir.relu"(%6543, %6544) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6546 = tensor.empty() : tensor<1x64xf32>
+    %6547 = "ttir.relu"(%6545, %6546) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6548 = tensor.empty() : tensor<1x64xf32>
+    %6549 = "ttir.relu"(%6547, %6548) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6550 = tensor.empty() : tensor<1x64xf32>
+    %6551 = "ttir.relu"(%6549, %6550) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6552 = tensor.empty() : tensor<1x64xf32>
+    %6553 = "ttir.relu"(%6551, %6552) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6554 = tensor.empty() : tensor<1x64xf32>
+    %6555 = "ttir.relu"(%6553, %6554) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6556 = tensor.empty() : tensor<1x64xf32>
+    %6557 = "ttir.relu"(%6555, %6556) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6558 = tensor.empty() : tensor<1x64xf32>
+    %6559 = "ttir.relu"(%6557, %6558) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6560 = tensor.empty() : tensor<1x64xf32>
+    %6561 = "ttir.relu"(%6559, %6560) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6562 = tensor.empty() : tensor<1x64xf32>
+    %6563 = "ttir.relu"(%6561, %6562) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6564 = tensor.empty() : tensor<1x64xf32>
+    %6565 = "ttir.relu"(%6563, %6564) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6566 = tensor.empty() : tensor<1x64xf32>
+    %6567 = "ttir.relu"(%6565, %6566) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6568 = tensor.empty() : tensor<1x64xf32>
+    %6569 = "ttir.relu"(%6567, %6568) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6570 = tensor.empty() : tensor<1x64xf32>
+    %6571 = "ttir.relu"(%6569, %6570) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6572 = tensor.empty() : tensor<1x64xf32>
+    %6573 = "ttir.relu"(%6571, %6572) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6574 = tensor.empty() : tensor<1x64xf32>
+    %6575 = "ttir.relu"(%6573, %6574) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6576 = tensor.empty() : tensor<1x64xf32>
+    %6577 = "ttir.relu"(%6575, %6576) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6578 = tensor.empty() : tensor<1x64xf32>
+    %6579 = "ttir.relu"(%6577, %6578) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6580 = tensor.empty() : tensor<1x64xf32>
+    %6581 = "ttir.relu"(%6579, %6580) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6582 = tensor.empty() : tensor<1x64xf32>
+    %6583 = "ttir.relu"(%6581, %6582) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6584 = tensor.empty() : tensor<1x64xf32>
+    %6585 = "ttir.relu"(%6583, %6584) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6586 = tensor.empty() : tensor<1x64xf32>
+    %6587 = "ttir.relu"(%6585, %6586) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6588 = tensor.empty() : tensor<1x64xf32>
+    %6589 = "ttir.relu"(%6587, %6588) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6590 = tensor.empty() : tensor<1x64xf32>
+    %6591 = "ttir.relu"(%6589, %6590) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6592 = tensor.empty() : tensor<1x64xf32>
+    %6593 = "ttir.relu"(%6591, %6592) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6594 = tensor.empty() : tensor<1x64xf32>
+    %6595 = "ttir.relu"(%6593, %6594) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6596 = tensor.empty() : tensor<1x64xf32>
+    %6597 = "ttir.relu"(%6595, %6596) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6598 = tensor.empty() : tensor<1x64xf32>
+    %6599 = "ttir.relu"(%6597, %6598) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6600 = tensor.empty() : tensor<1x64xf32>
+    %6601 = "ttir.relu"(%6599, %6600) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6602 = tensor.empty() : tensor<1x64xf32>
+    %6603 = "ttir.relu"(%6601, %6602) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6604 = tensor.empty() : tensor<1x64xf32>
+    %6605 = "ttir.relu"(%6603, %6604) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6606 = tensor.empty() : tensor<1x64xf32>
+    %6607 = "ttir.relu"(%6605, %6606) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6608 = tensor.empty() : tensor<1x64xf32>
+    %6609 = "ttir.relu"(%6607, %6608) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6610 = tensor.empty() : tensor<1x64xf32>
+    %6611 = "ttir.relu"(%6609, %6610) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6612 = tensor.empty() : tensor<1x64xf32>
+    %6613 = "ttir.relu"(%6611, %6612) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6614 = tensor.empty() : tensor<1x64xf32>
+    %6615 = "ttir.relu"(%6613, %6614) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6616 = tensor.empty() : tensor<1x64xf32>
+    %6617 = "ttir.relu"(%6615, %6616) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6618 = tensor.empty() : tensor<1x64xf32>
+    %6619 = "ttir.relu"(%6617, %6618) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6620 = tensor.empty() : tensor<1x64xf32>
+    %6621 = "ttir.relu"(%6619, %6620) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6622 = tensor.empty() : tensor<1x64xf32>
+    %6623 = "ttir.relu"(%6621, %6622) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6624 = tensor.empty() : tensor<1x64xf32>
+    %6625 = "ttir.relu"(%6623, %6624) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6626 = tensor.empty() : tensor<1x64xf32>
+    %6627 = "ttir.relu"(%6625, %6626) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6628 = tensor.empty() : tensor<1x64xf32>
+    %6629 = "ttir.relu"(%6627, %6628) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6630 = tensor.empty() : tensor<1x64xf32>
+    %6631 = "ttir.relu"(%6629, %6630) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6632 = tensor.empty() : tensor<1x64xf32>
+    %6633 = "ttir.relu"(%6631, %6632) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6634 = tensor.empty() : tensor<1x64xf32>
+    %6635 = "ttir.relu"(%6633, %6634) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6636 = tensor.empty() : tensor<1x64xf32>
+    %6637 = "ttir.relu"(%6635, %6636) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6638 = tensor.empty() : tensor<1x64xf32>
+    %6639 = "ttir.relu"(%6637, %6638) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6640 = tensor.empty() : tensor<1x64xf32>
+    %6641 = "ttir.relu"(%6639, %6640) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6642 = tensor.empty() : tensor<1x64xf32>
+    %6643 = "ttir.relu"(%6641, %6642) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6644 = tensor.empty() : tensor<1x64xf32>
+    %6645 = "ttir.relu"(%6643, %6644) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6646 = tensor.empty() : tensor<1x64xf32>
+    %6647 = "ttir.relu"(%6645, %6646) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6648 = tensor.empty() : tensor<1x64xf32>
+    %6649 = "ttir.relu"(%6647, %6648) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6650 = tensor.empty() : tensor<1x64xf32>
+    %6651 = "ttir.relu"(%6649, %6650) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6652 = tensor.empty() : tensor<1x64xf32>
+    %6653 = "ttir.relu"(%6651, %6652) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6654 = tensor.empty() : tensor<1x64xf32>
+    %6655 = "ttir.relu"(%6653, %6654) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6656 = tensor.empty() : tensor<1x64xf32>
+    %6657 = "ttir.relu"(%6655, %6656) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6658 = tensor.empty() : tensor<1x64xf32>
+    %6659 = "ttir.relu"(%6657, %6658) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6660 = tensor.empty() : tensor<1x64xf32>
+    %6661 = "ttir.relu"(%6659, %6660) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6662 = tensor.empty() : tensor<1x64xf32>
+    %6663 = "ttir.relu"(%6661, %6662) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6664 = tensor.empty() : tensor<1x64xf32>
+    %6665 = "ttir.relu"(%6663, %6664) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6666 = tensor.empty() : tensor<1x64xf32>
+    %6667 = "ttir.relu"(%6665, %6666) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6668 = tensor.empty() : tensor<1x64xf32>
+    %6669 = "ttir.relu"(%6667, %6668) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6670 = tensor.empty() : tensor<1x64xf32>
+    %6671 = "ttir.relu"(%6669, %6670) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6672 = tensor.empty() : tensor<1x64xf32>
+    %6673 = "ttir.relu"(%6671, %6672) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6674 = tensor.empty() : tensor<1x64xf32>
+    %6675 = "ttir.relu"(%6673, %6674) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6676 = tensor.empty() : tensor<1x64xf32>
+    %6677 = "ttir.relu"(%6675, %6676) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6678 = tensor.empty() : tensor<1x64xf32>
+    %6679 = "ttir.relu"(%6677, %6678) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6680 = tensor.empty() : tensor<1x64xf32>
+    %6681 = "ttir.relu"(%6679, %6680) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6682 = tensor.empty() : tensor<1x64xf32>
+    %6683 = "ttir.relu"(%6681, %6682) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6684 = tensor.empty() : tensor<1x64xf32>
+    %6685 = "ttir.relu"(%6683, %6684) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6686 = tensor.empty() : tensor<1x64xf32>
+    %6687 = "ttir.relu"(%6685, %6686) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6688 = tensor.empty() : tensor<1x64xf32>
+    %6689 = "ttir.relu"(%6687, %6688) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6690 = tensor.empty() : tensor<1x64xf32>
+    %6691 = "ttir.relu"(%6689, %6690) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6692 = tensor.empty() : tensor<1x64xf32>
+    %6693 = "ttir.relu"(%6691, %6692) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6694 = tensor.empty() : tensor<1x64xf32>
+    %6695 = "ttir.relu"(%6693, %6694) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6696 = tensor.empty() : tensor<1x64xf32>
+    %6697 = "ttir.relu"(%6695, %6696) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6698 = tensor.empty() : tensor<1x64xf32>
+    %6699 = "ttir.relu"(%6697, %6698) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6700 = tensor.empty() : tensor<1x64xf32>
+    %6701 = "ttir.relu"(%6699, %6700) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6702 = tensor.empty() : tensor<1x64xf32>
+    %6703 = "ttir.relu"(%6701, %6702) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6704 = tensor.empty() : tensor<1x64xf32>
+    %6705 = "ttir.relu"(%6703, %6704) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6706 = tensor.empty() : tensor<1x64xf32>
+    %6707 = "ttir.relu"(%6705, %6706) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6708 = tensor.empty() : tensor<1x64xf32>
+    %6709 = "ttir.relu"(%6707, %6708) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6710 = tensor.empty() : tensor<1x64xf32>
+    %6711 = "ttir.relu"(%6709, %6710) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6712 = tensor.empty() : tensor<1x64xf32>
+    %6713 = "ttir.relu"(%6711, %6712) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6714 = tensor.empty() : tensor<1x64xf32>
+    %6715 = "ttir.relu"(%6713, %6714) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6716 = tensor.empty() : tensor<1x64xf32>
+    %6717 = "ttir.relu"(%6715, %6716) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6718 = tensor.empty() : tensor<1x64xf32>
+    %6719 = "ttir.relu"(%6717, %6718) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6720 = tensor.empty() : tensor<1x64xf32>
+    %6721 = "ttir.relu"(%6719, %6720) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6722 = tensor.empty() : tensor<1x64xf32>
+    %6723 = "ttir.relu"(%6721, %6722) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6724 = tensor.empty() : tensor<1x64xf32>
+    %6725 = "ttir.relu"(%6723, %6724) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6726 = tensor.empty() : tensor<1x64xf32>
+    %6727 = "ttir.relu"(%6725, %6726) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6728 = tensor.empty() : tensor<1x64xf32>
+    %6729 = "ttir.relu"(%6727, %6728) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6730 = tensor.empty() : tensor<1x64xf32>
+    %6731 = "ttir.relu"(%6729, %6730) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6732 = tensor.empty() : tensor<1x64xf32>
+    %6733 = "ttir.relu"(%6731, %6732) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6734 = tensor.empty() : tensor<1x64xf32>
+    %6735 = "ttir.relu"(%6733, %6734) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6736 = tensor.empty() : tensor<1x64xf32>
+    %6737 = "ttir.relu"(%6735, %6736) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6738 = tensor.empty() : tensor<1x64xf32>
+    %6739 = "ttir.relu"(%6737, %6738) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6740 = tensor.empty() : tensor<1x64xf32>
+    %6741 = "ttir.relu"(%6739, %6740) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6742 = tensor.empty() : tensor<1x64xf32>
+    %6743 = "ttir.relu"(%6741, %6742) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6744 = tensor.empty() : tensor<1x64xf32>
+    %6745 = "ttir.relu"(%6743, %6744) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6746 = tensor.empty() : tensor<1x64xf32>
+    %6747 = "ttir.relu"(%6745, %6746) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6748 = tensor.empty() : tensor<1x64xf32>
+    %6749 = "ttir.relu"(%6747, %6748) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6750 = tensor.empty() : tensor<1x64xf32>
+    %6751 = "ttir.relu"(%6749, %6750) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6752 = tensor.empty() : tensor<1x64xf32>
+    %6753 = "ttir.relu"(%6751, %6752) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6754 = tensor.empty() : tensor<1x64xf32>
+    %6755 = "ttir.relu"(%6753, %6754) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6756 = tensor.empty() : tensor<1x64xf32>
+    %6757 = "ttir.relu"(%6755, %6756) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6758 = tensor.empty() : tensor<1x64xf32>
+    %6759 = "ttir.relu"(%6757, %6758) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6760 = tensor.empty() : tensor<1x64xf32>
+    %6761 = "ttir.relu"(%6759, %6760) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6762 = tensor.empty() : tensor<1x64xf32>
+    %6763 = "ttir.relu"(%6761, %6762) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6764 = tensor.empty() : tensor<1x64xf32>
+    %6765 = "ttir.relu"(%6763, %6764) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6766 = tensor.empty() : tensor<1x64xf32>
+    %6767 = "ttir.relu"(%6765, %6766) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6768 = tensor.empty() : tensor<1x64xf32>
+    %6769 = "ttir.relu"(%6767, %6768) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6770 = tensor.empty() : tensor<1x64xf32>
+    %6771 = "ttir.relu"(%6769, %6770) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6772 = tensor.empty() : tensor<1x64xf32>
+    %6773 = "ttir.relu"(%6771, %6772) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6774 = tensor.empty() : tensor<1x64xf32>
+    %6775 = "ttir.relu"(%6773, %6774) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6776 = tensor.empty() : tensor<1x64xf32>
+    %6777 = "ttir.relu"(%6775, %6776) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6778 = tensor.empty() : tensor<1x64xf32>
+    %6779 = "ttir.relu"(%6777, %6778) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6780 = tensor.empty() : tensor<1x64xf32>
+    %6781 = "ttir.relu"(%6779, %6780) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6782 = tensor.empty() : tensor<1x64xf32>
+    %6783 = "ttir.relu"(%6781, %6782) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6784 = tensor.empty() : tensor<1x64xf32>
+    %6785 = "ttir.relu"(%6783, %6784) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6786 = tensor.empty() : tensor<1x64xf32>
+    %6787 = "ttir.relu"(%6785, %6786) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6788 = tensor.empty() : tensor<1x64xf32>
+    %6789 = "ttir.relu"(%6787, %6788) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6790 = tensor.empty() : tensor<1x64xf32>
+    %6791 = "ttir.relu"(%6789, %6790) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6792 = tensor.empty() : tensor<1x64xf32>
+    %6793 = "ttir.relu"(%6791, %6792) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6794 = tensor.empty() : tensor<1x64xf32>
+    %6795 = "ttir.relu"(%6793, %6794) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6796 = tensor.empty() : tensor<1x64xf32>
+    %6797 = "ttir.relu"(%6795, %6796) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6798 = tensor.empty() : tensor<1x64xf32>
+    %6799 = "ttir.relu"(%6797, %6798) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6800 = tensor.empty() : tensor<1x64xf32>
+    %6801 = "ttir.relu"(%6799, %6800) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6802 = tensor.empty() : tensor<1x64xf32>
+    %6803 = "ttir.relu"(%6801, %6802) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6804 = tensor.empty() : tensor<1x64xf32>
+    %6805 = "ttir.relu"(%6803, %6804) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6806 = tensor.empty() : tensor<1x64xf32>
+    %6807 = "ttir.relu"(%6805, %6806) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6808 = tensor.empty() : tensor<1x64xf32>
+    %6809 = "ttir.relu"(%6807, %6808) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6810 = tensor.empty() : tensor<1x64xf32>
+    %6811 = "ttir.relu"(%6809, %6810) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6812 = tensor.empty() : tensor<1x64xf32>
+    %6813 = "ttir.relu"(%6811, %6812) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6814 = tensor.empty() : tensor<1x64xf32>
+    %6815 = "ttir.relu"(%6813, %6814) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6816 = tensor.empty() : tensor<1x64xf32>
+    %6817 = "ttir.relu"(%6815, %6816) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6818 = tensor.empty() : tensor<1x64xf32>
+    %6819 = "ttir.relu"(%6817, %6818) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6820 = tensor.empty() : tensor<1x64xf32>
+    %6821 = "ttir.relu"(%6819, %6820) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6822 = tensor.empty() : tensor<1x64xf32>
+    %6823 = "ttir.relu"(%6821, %6822) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6824 = tensor.empty() : tensor<1x64xf32>
+    %6825 = "ttir.relu"(%6823, %6824) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6826 = tensor.empty() : tensor<1x64xf32>
+    %6827 = "ttir.relu"(%6825, %6826) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6828 = tensor.empty() : tensor<1x64xf32>
+    %6829 = "ttir.relu"(%6827, %6828) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6830 = tensor.empty() : tensor<1x64xf32>
+    %6831 = "ttir.relu"(%6829, %6830) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6832 = tensor.empty() : tensor<1x64xf32>
+    %6833 = "ttir.relu"(%6831, %6832) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6834 = tensor.empty() : tensor<1x64xf32>
+    %6835 = "ttir.relu"(%6833, %6834) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6836 = tensor.empty() : tensor<1x64xf32>
+    %6837 = "ttir.relu"(%6835, %6836) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6838 = tensor.empty() : tensor<1x64xf32>
+    %6839 = "ttir.relu"(%6837, %6838) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6840 = tensor.empty() : tensor<1x64xf32>
+    %6841 = "ttir.relu"(%6839, %6840) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6842 = tensor.empty() : tensor<1x64xf32>
+    %6843 = "ttir.relu"(%6841, %6842) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6844 = tensor.empty() : tensor<1x64xf32>
+    %6845 = "ttir.relu"(%6843, %6844) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6846 = tensor.empty() : tensor<1x64xf32>
+    %6847 = "ttir.relu"(%6845, %6846) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6848 = tensor.empty() : tensor<1x64xf32>
+    %6849 = "ttir.relu"(%6847, %6848) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6850 = tensor.empty() : tensor<1x64xf32>
+    %6851 = "ttir.relu"(%6849, %6850) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6852 = tensor.empty() : tensor<1x64xf32>
+    %6853 = "ttir.relu"(%6851, %6852) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6854 = tensor.empty() : tensor<1x64xf32>
+    %6855 = "ttir.relu"(%6853, %6854) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6856 = tensor.empty() : tensor<1x64xf32>
+    %6857 = "ttir.relu"(%6855, %6856) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6858 = tensor.empty() : tensor<1x64xf32>
+    %6859 = "ttir.relu"(%6857, %6858) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6860 = tensor.empty() : tensor<1x64xf32>
+    %6861 = "ttir.relu"(%6859, %6860) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6862 = tensor.empty() : tensor<1x64xf32>
+    %6863 = "ttir.relu"(%6861, %6862) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6864 = tensor.empty() : tensor<1x64xf32>
+    %6865 = "ttir.relu"(%6863, %6864) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6866 = tensor.empty() : tensor<1x64xf32>
+    %6867 = "ttir.relu"(%6865, %6866) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6868 = tensor.empty() : tensor<1x64xf32>
+    %6869 = "ttir.relu"(%6867, %6868) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6870 = tensor.empty() : tensor<1x64xf32>
+    %6871 = "ttir.relu"(%6869, %6870) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6872 = tensor.empty() : tensor<1x64xf32>
+    %6873 = "ttir.relu"(%6871, %6872) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6874 = tensor.empty() : tensor<1x64xf32>
+    %6875 = "ttir.relu"(%6873, %6874) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6876 = tensor.empty() : tensor<1x64xf32>
+    %6877 = "ttir.relu"(%6875, %6876) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6878 = tensor.empty() : tensor<1x64xf32>
+    %6879 = "ttir.relu"(%6877, %6878) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6880 = tensor.empty() : tensor<1x64xf32>
+    %6881 = "ttir.relu"(%6879, %6880) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6882 = tensor.empty() : tensor<1x64xf32>
+    %6883 = "ttir.relu"(%6881, %6882) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6884 = tensor.empty() : tensor<1x64xf32>
+    %6885 = "ttir.relu"(%6883, %6884) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6886 = tensor.empty() : tensor<1x64xf32>
+    %6887 = "ttir.relu"(%6885, %6886) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6888 = tensor.empty() : tensor<1x64xf32>
+    %6889 = "ttir.relu"(%6887, %6888) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6890 = tensor.empty() : tensor<1x64xf32>
+    %6891 = "ttir.relu"(%6889, %6890) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6892 = tensor.empty() : tensor<1x64xf32>
+    %6893 = "ttir.relu"(%6891, %6892) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6894 = tensor.empty() : tensor<1x64xf32>
+    %6895 = "ttir.relu"(%6893, %6894) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6896 = tensor.empty() : tensor<1x64xf32>
+    %6897 = "ttir.relu"(%6895, %6896) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6898 = tensor.empty() : tensor<1x64xf32>
+    %6899 = "ttir.relu"(%6897, %6898) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6900 = tensor.empty() : tensor<1x64xf32>
+    %6901 = "ttir.relu"(%6899, %6900) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6902 = tensor.empty() : tensor<1x64xf32>
+    %6903 = "ttir.relu"(%6901, %6902) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6904 = tensor.empty() : tensor<1x64xf32>
+    %6905 = "ttir.relu"(%6903, %6904) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6906 = tensor.empty() : tensor<1x64xf32>
+    %6907 = "ttir.relu"(%6905, %6906) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6908 = tensor.empty() : tensor<1x64xf32>
+    %6909 = "ttir.relu"(%6907, %6908) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6910 = tensor.empty() : tensor<1x64xf32>
+    %6911 = "ttir.relu"(%6909, %6910) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6912 = tensor.empty() : tensor<1x64xf32>
+    %6913 = "ttir.relu"(%6911, %6912) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6914 = tensor.empty() : tensor<1x64xf32>
+    %6915 = "ttir.relu"(%6913, %6914) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6916 = tensor.empty() : tensor<1x64xf32>
+    %6917 = "ttir.relu"(%6915, %6916) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6918 = tensor.empty() : tensor<1x64xf32>
+    %6919 = "ttir.relu"(%6917, %6918) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6920 = tensor.empty() : tensor<1x64xf32>
+    %6921 = "ttir.relu"(%6919, %6920) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6922 = tensor.empty() : tensor<1x64xf32>
+    %6923 = "ttir.relu"(%6921, %6922) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6924 = tensor.empty() : tensor<1x64xf32>
+    %6925 = "ttir.relu"(%6923, %6924) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6926 = tensor.empty() : tensor<1x64xf32>
+    %6927 = "ttir.relu"(%6925, %6926) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6928 = tensor.empty() : tensor<1x64xf32>
+    %6929 = "ttir.relu"(%6927, %6928) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6930 = tensor.empty() : tensor<1x64xf32>
+    %6931 = "ttir.relu"(%6929, %6930) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6932 = tensor.empty() : tensor<1x64xf32>
+    %6933 = "ttir.relu"(%6931, %6932) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6934 = tensor.empty() : tensor<1x64xf32>
+    %6935 = "ttir.relu"(%6933, %6934) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6936 = tensor.empty() : tensor<1x64xf32>
+    %6937 = "ttir.relu"(%6935, %6936) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6938 = tensor.empty() : tensor<1x64xf32>
+    %6939 = "ttir.relu"(%6937, %6938) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6940 = tensor.empty() : tensor<1x64xf32>
+    %6941 = "ttir.relu"(%6939, %6940) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6942 = tensor.empty() : tensor<1x64xf32>
+    %6943 = "ttir.relu"(%6941, %6942) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6944 = tensor.empty() : tensor<1x64xf32>
+    %6945 = "ttir.relu"(%6943, %6944) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6946 = tensor.empty() : tensor<1x64xf32>
+    %6947 = "ttir.relu"(%6945, %6946) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6948 = tensor.empty() : tensor<1x64xf32>
+    %6949 = "ttir.relu"(%6947, %6948) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6950 = tensor.empty() : tensor<1x64xf32>
+    %6951 = "ttir.relu"(%6949, %6950) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6952 = tensor.empty() : tensor<1x64xf32>
+    %6953 = "ttir.relu"(%6951, %6952) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6954 = tensor.empty() : tensor<1x64xf32>
+    %6955 = "ttir.relu"(%6953, %6954) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6956 = tensor.empty() : tensor<1x64xf32>
+    %6957 = "ttir.relu"(%6955, %6956) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6958 = tensor.empty() : tensor<1x64xf32>
+    %6959 = "ttir.relu"(%6957, %6958) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6960 = tensor.empty() : tensor<1x64xf32>
+    %6961 = "ttir.relu"(%6959, %6960) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6962 = tensor.empty() : tensor<1x64xf32>
+    %6963 = "ttir.relu"(%6961, %6962) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6964 = tensor.empty() : tensor<1x64xf32>
+    %6965 = "ttir.relu"(%6963, %6964) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6966 = tensor.empty() : tensor<1x64xf32>
+    %6967 = "ttir.relu"(%6965, %6966) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6968 = tensor.empty() : tensor<1x64xf32>
+    %6969 = "ttir.relu"(%6967, %6968) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6970 = tensor.empty() : tensor<1x64xf32>
+    %6971 = "ttir.relu"(%6969, %6970) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6972 = tensor.empty() : tensor<1x64xf32>
+    %6973 = "ttir.relu"(%6971, %6972) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6974 = tensor.empty() : tensor<1x64xf32>
+    %6975 = "ttir.relu"(%6973, %6974) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6976 = tensor.empty() : tensor<1x64xf32>
+    %6977 = "ttir.relu"(%6975, %6976) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6978 = tensor.empty() : tensor<1x64xf32>
+    %6979 = "ttir.relu"(%6977, %6978) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6980 = tensor.empty() : tensor<1x64xf32>
+    %6981 = "ttir.relu"(%6979, %6980) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6982 = tensor.empty() : tensor<1x64xf32>
+    %6983 = "ttir.relu"(%6981, %6982) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6984 = tensor.empty() : tensor<1x64xf32>
+    %6985 = "ttir.relu"(%6983, %6984) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6986 = tensor.empty() : tensor<1x64xf32>
+    %6987 = "ttir.relu"(%6985, %6986) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6988 = tensor.empty() : tensor<1x64xf32>
+    %6989 = "ttir.relu"(%6987, %6988) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6990 = tensor.empty() : tensor<1x64xf32>
+    %6991 = "ttir.relu"(%6989, %6990) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6992 = tensor.empty() : tensor<1x64xf32>
+    %6993 = "ttir.relu"(%6991, %6992) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6994 = tensor.empty() : tensor<1x64xf32>
+    %6995 = "ttir.relu"(%6993, %6994) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6996 = tensor.empty() : tensor<1x64xf32>
+    %6997 = "ttir.relu"(%6995, %6996) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6998 = tensor.empty() : tensor<1x64xf32>
+    %6999 = "ttir.relu"(%6997, %6998) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7000 = tensor.empty() : tensor<1x64xf32>
+    %7001 = "ttir.relu"(%6999, %7000) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7002 = tensor.empty() : tensor<1x64xf32>
+    %7003 = "ttir.relu"(%7001, %7002) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7004 = tensor.empty() : tensor<1x64xf32>
+    %7005 = "ttir.relu"(%7003, %7004) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7006 = tensor.empty() : tensor<1x64xf32>
+    %7007 = "ttir.relu"(%7005, %7006) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7008 = tensor.empty() : tensor<1x64xf32>
+    %7009 = "ttir.relu"(%7007, %7008) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7010 = tensor.empty() : tensor<1x64xf32>
+    %7011 = "ttir.relu"(%7009, %7010) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7012 = tensor.empty() : tensor<1x64xf32>
+    %7013 = "ttir.relu"(%7011, %7012) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7014 = tensor.empty() : tensor<1x64xf32>
+    %7015 = "ttir.relu"(%7013, %7014) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7016 = tensor.empty() : tensor<1x64xf32>
+    %7017 = "ttir.relu"(%7015, %7016) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7018 = tensor.empty() : tensor<1x64xf32>
+    %7019 = "ttir.relu"(%7017, %7018) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7020 = tensor.empty() : tensor<1x64xf32>
+    %7021 = "ttir.relu"(%7019, %7020) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7022 = tensor.empty() : tensor<1x64xf32>
+    %7023 = "ttir.relu"(%7021, %7022) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7024 = tensor.empty() : tensor<1x64xf32>
+    %7025 = "ttir.relu"(%7023, %7024) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7026 = tensor.empty() : tensor<1x64xf32>
+    %7027 = "ttir.relu"(%7025, %7026) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7028 = tensor.empty() : tensor<1x64xf32>
+    %7029 = "ttir.relu"(%7027, %7028) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7030 = tensor.empty() : tensor<1x64xf32>
+    %7031 = "ttir.relu"(%7029, %7030) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7032 = tensor.empty() : tensor<1x64xf32>
+    %7033 = "ttir.relu"(%7031, %7032) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7034 = tensor.empty() : tensor<1x64xf32>
+    %7035 = "ttir.relu"(%7033, %7034) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7036 = tensor.empty() : tensor<1x64xf32>
+    %7037 = "ttir.relu"(%7035, %7036) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7038 = tensor.empty() : tensor<1x64xf32>
+    %7039 = "ttir.relu"(%7037, %7038) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7040 = tensor.empty() : tensor<1x64xf32>
+    %7041 = "ttir.relu"(%7039, %7040) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7042 = tensor.empty() : tensor<1x64xf32>
+    %7043 = "ttir.relu"(%7041, %7042) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7044 = tensor.empty() : tensor<1x64xf32>
+    %7045 = "ttir.relu"(%7043, %7044) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7046 = tensor.empty() : tensor<1x64xf32>
+    %7047 = "ttir.relu"(%7045, %7046) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7048 = tensor.empty() : tensor<1x64xf32>
+    %7049 = "ttir.relu"(%7047, %7048) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7050 = tensor.empty() : tensor<1x64xf32>
+    %7051 = "ttir.relu"(%7049, %7050) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7052 = tensor.empty() : tensor<1x64xf32>
+    %7053 = "ttir.relu"(%7051, %7052) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7054 = tensor.empty() : tensor<1x64xf32>
+    %7055 = "ttir.relu"(%7053, %7054) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7056 = tensor.empty() : tensor<1x64xf32>
+    %7057 = "ttir.relu"(%7055, %7056) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7058 = tensor.empty() : tensor<1x64xf32>
+    %7059 = "ttir.relu"(%7057, %7058) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7060 = tensor.empty() : tensor<1x64xf32>
+    %7061 = "ttir.relu"(%7059, %7060) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7062 = tensor.empty() : tensor<1x64xf32>
+    %7063 = "ttir.relu"(%7061, %7062) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7064 = tensor.empty() : tensor<1x64xf32>
+    %7065 = "ttir.relu"(%7063, %7064) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7066 = tensor.empty() : tensor<1x64xf32>
+    %7067 = "ttir.relu"(%7065, %7066) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7068 = tensor.empty() : tensor<1x64xf32>
+    %7069 = "ttir.relu"(%7067, %7068) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7070 = tensor.empty() : tensor<1x64xf32>
+    %7071 = "ttir.relu"(%7069, %7070) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7072 = tensor.empty() : tensor<1x64xf32>
+    %7073 = "ttir.relu"(%7071, %7072) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7074 = tensor.empty() : tensor<1x64xf32>
+    %7075 = "ttir.relu"(%7073, %7074) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7076 = tensor.empty() : tensor<1x64xf32>
+    %7077 = "ttir.relu"(%7075, %7076) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7078 = tensor.empty() : tensor<1x64xf32>
+    %7079 = "ttir.relu"(%7077, %7078) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7080 = tensor.empty() : tensor<1x64xf32>
+    %7081 = "ttir.relu"(%7079, %7080) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7082 = tensor.empty() : tensor<1x64xf32>
+    %7083 = "ttir.relu"(%7081, %7082) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7084 = tensor.empty() : tensor<1x64xf32>
+    %7085 = "ttir.relu"(%7083, %7084) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7086 = tensor.empty() : tensor<1x64xf32>
+    %7087 = "ttir.relu"(%7085, %7086) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7088 = tensor.empty() : tensor<1x64xf32>
+    %7089 = "ttir.relu"(%7087, %7088) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7090 = tensor.empty() : tensor<1x64xf32>
+    %7091 = "ttir.relu"(%7089, %7090) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7092 = tensor.empty() : tensor<1x64xf32>
+    %7093 = "ttir.relu"(%7091, %7092) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7094 = tensor.empty() : tensor<1x64xf32>
+    %7095 = "ttir.relu"(%7093, %7094) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7096 = tensor.empty() : tensor<1x64xf32>
+    %7097 = "ttir.relu"(%7095, %7096) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7098 = tensor.empty() : tensor<1x64xf32>
+    %7099 = "ttir.relu"(%7097, %7098) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7100 = tensor.empty() : tensor<1x64xf32>
+    %7101 = "ttir.relu"(%7099, %7100) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7102 = tensor.empty() : tensor<1x64xf32>
+    %7103 = "ttir.relu"(%7101, %7102) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7104 = tensor.empty() : tensor<1x64xf32>
+    %7105 = "ttir.relu"(%7103, %7104) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7106 = tensor.empty() : tensor<1x64xf32>
+    %7107 = "ttir.relu"(%7105, %7106) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7108 = tensor.empty() : tensor<1x64xf32>
+    %7109 = "ttir.relu"(%7107, %7108) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7110 = tensor.empty() : tensor<1x64xf32>
+    %7111 = "ttir.relu"(%7109, %7110) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7112 = tensor.empty() : tensor<1x64xf32>
+    %7113 = "ttir.relu"(%7111, %7112) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7114 = tensor.empty() : tensor<1x64xf32>
+    %7115 = "ttir.relu"(%7113, %7114) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7116 = tensor.empty() : tensor<1x64xf32>
+    %7117 = "ttir.relu"(%7115, %7116) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7118 = tensor.empty() : tensor<1x64xf32>
+    %7119 = "ttir.relu"(%7117, %7118) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7120 = tensor.empty() : tensor<1x64xf32>
+    %7121 = "ttir.relu"(%7119, %7120) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7122 = tensor.empty() : tensor<1x64xf32>
+    %7123 = "ttir.relu"(%7121, %7122) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7124 = tensor.empty() : tensor<1x64xf32>
+    %7125 = "ttir.relu"(%7123, %7124) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7126 = tensor.empty() : tensor<1x64xf32>
+    %7127 = "ttir.relu"(%7125, %7126) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7128 = tensor.empty() : tensor<1x64xf32>
+    %7129 = "ttir.relu"(%7127, %7128) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7130 = tensor.empty() : tensor<1x64xf32>
+    %7131 = "ttir.relu"(%7129, %7130) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7132 = tensor.empty() : tensor<1x64xf32>
+    %7133 = "ttir.relu"(%7131, %7132) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7134 = tensor.empty() : tensor<1x64xf32>
+    %7135 = "ttir.relu"(%7133, %7134) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7136 = tensor.empty() : tensor<1x64xf32>
+    %7137 = "ttir.relu"(%7135, %7136) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7138 = tensor.empty() : tensor<1x64xf32>
+    %7139 = "ttir.relu"(%7137, %7138) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7140 = tensor.empty() : tensor<1x64xf32>
+    %7141 = "ttir.relu"(%7139, %7140) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7142 = tensor.empty() : tensor<1x64xf32>
+    %7143 = "ttir.relu"(%7141, %7142) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7144 = tensor.empty() : tensor<1x64xf32>
+    %7145 = "ttir.relu"(%7143, %7144) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7146 = tensor.empty() : tensor<1x64xf32>
+    %7147 = "ttir.relu"(%7145, %7146) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7148 = tensor.empty() : tensor<1x64xf32>
+    %7149 = "ttir.relu"(%7147, %7148) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7150 = tensor.empty() : tensor<1x64xf32>
+    %7151 = "ttir.relu"(%7149, %7150) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7152 = tensor.empty() : tensor<1x64xf32>
+    %7153 = "ttir.relu"(%7151, %7152) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7154 = tensor.empty() : tensor<1x64xf32>
+    %7155 = "ttir.relu"(%7153, %7154) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7156 = tensor.empty() : tensor<1x64xf32>
+    %7157 = "ttir.relu"(%7155, %7156) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7158 = tensor.empty() : tensor<1x64xf32>
+    %7159 = "ttir.relu"(%7157, %7158) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7160 = tensor.empty() : tensor<1x64xf32>
+    %7161 = "ttir.relu"(%7159, %7160) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7162 = tensor.empty() : tensor<1x64xf32>
+    %7163 = "ttir.relu"(%7161, %7162) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7164 = tensor.empty() : tensor<1x64xf32>
+    %7165 = "ttir.relu"(%7163, %7164) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7166 = tensor.empty() : tensor<1x64xf32>
+    %7167 = "ttir.relu"(%7165, %7166) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7168 = tensor.empty() : tensor<1x64xf32>
+    %7169 = "ttir.relu"(%7167, %7168) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7170 = tensor.empty() : tensor<1x64xf32>
+    %7171 = "ttir.relu"(%7169, %7170) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7172 = tensor.empty() : tensor<1x64xf32>
+    %7173 = "ttir.relu"(%7171, %7172) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7174 = tensor.empty() : tensor<1x64xf32>
+    %7175 = "ttir.relu"(%7173, %7174) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7176 = tensor.empty() : tensor<1x64xf32>
+    %7177 = "ttir.relu"(%7175, %7176) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7178 = tensor.empty() : tensor<1x64xf32>
+    %7179 = "ttir.relu"(%7177, %7178) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7180 = tensor.empty() : tensor<1x64xf32>
+    %7181 = "ttir.relu"(%7179, %7180) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7182 = tensor.empty() : tensor<1x64xf32>
+    %7183 = "ttir.relu"(%7181, %7182) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7184 = tensor.empty() : tensor<1x64xf32>
+    %7185 = "ttir.relu"(%7183, %7184) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7186 = tensor.empty() : tensor<1x64xf32>
+    %7187 = "ttir.relu"(%7185, %7186) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7188 = tensor.empty() : tensor<1x64xf32>
+    %7189 = "ttir.relu"(%7187, %7188) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7190 = tensor.empty() : tensor<1x64xf32>
+    %7191 = "ttir.relu"(%7189, %7190) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7192 = tensor.empty() : tensor<1x64xf32>
+    %7193 = "ttir.relu"(%7191, %7192) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7194 = tensor.empty() : tensor<1x64xf32>
+    %7195 = "ttir.relu"(%7193, %7194) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7196 = tensor.empty() : tensor<1x64xf32>
+    %7197 = "ttir.relu"(%7195, %7196) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7198 = tensor.empty() : tensor<1x64xf32>
+    %7199 = "ttir.relu"(%7197, %7198) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7200 = tensor.empty() : tensor<1x64xf32>
+    %7201 = "ttir.relu"(%7199, %7200) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7202 = tensor.empty() : tensor<1x64xf32>
+    %7203 = "ttir.relu"(%7201, %7202) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7204 = tensor.empty() : tensor<1x64xf32>
+    %7205 = "ttir.relu"(%7203, %7204) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7206 = tensor.empty() : tensor<1x64xf32>
+    %7207 = "ttir.relu"(%7205, %7206) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7208 = tensor.empty() : tensor<1x64xf32>
+    %7209 = "ttir.relu"(%7207, %7208) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7210 = tensor.empty() : tensor<1x64xf32>
+    %7211 = "ttir.relu"(%7209, %7210) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7212 = tensor.empty() : tensor<1x64xf32>
+    %7213 = "ttir.relu"(%7211, %7212) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7214 = tensor.empty() : tensor<1x64xf32>
+    %7215 = "ttir.relu"(%7213, %7214) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7216 = tensor.empty() : tensor<1x64xf32>
+    %7217 = "ttir.relu"(%7215, %7216) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7218 = tensor.empty() : tensor<1x64xf32>
+    %7219 = "ttir.relu"(%7217, %7218) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7220 = tensor.empty() : tensor<1x64xf32>
+    %7221 = "ttir.relu"(%7219, %7220) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7222 = tensor.empty() : tensor<1x64xf32>
+    %7223 = "ttir.relu"(%7221, %7222) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7224 = tensor.empty() : tensor<1x64xf32>
+    %7225 = "ttir.relu"(%7223, %7224) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7226 = tensor.empty() : tensor<1x64xf32>
+    %7227 = "ttir.relu"(%7225, %7226) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7228 = tensor.empty() : tensor<1x64xf32>
+    %7229 = "ttir.relu"(%7227, %7228) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7230 = tensor.empty() : tensor<1x64xf32>
+    %7231 = "ttir.relu"(%7229, %7230) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7232 = tensor.empty() : tensor<1x64xf32>
+    %7233 = "ttir.relu"(%7231, %7232) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7234 = tensor.empty() : tensor<1x64xf32>
+    %7235 = "ttir.relu"(%7233, %7234) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7236 = tensor.empty() : tensor<1x64xf32>
+    %7237 = "ttir.relu"(%7235, %7236) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7238 = tensor.empty() : tensor<1x64xf32>
+    %7239 = "ttir.relu"(%7237, %7238) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7240 = tensor.empty() : tensor<1x64xf32>
+    %7241 = "ttir.relu"(%7239, %7240) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7242 = tensor.empty() : tensor<1x64xf32>
+    %7243 = "ttir.relu"(%7241, %7242) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7244 = tensor.empty() : tensor<1x64xf32>
+    %7245 = "ttir.relu"(%7243, %7244) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7246 = tensor.empty() : tensor<1x64xf32>
+    %7247 = "ttir.relu"(%7245, %7246) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7248 = tensor.empty() : tensor<1x64xf32>
+    %7249 = "ttir.relu"(%7247, %7248) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7250 = tensor.empty() : tensor<1x64xf32>
+    %7251 = "ttir.relu"(%7249, %7250) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7252 = tensor.empty() : tensor<1x64xf32>
+    %7253 = "ttir.relu"(%7251, %7252) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7254 = tensor.empty() : tensor<1x64xf32>
+    %7255 = "ttir.relu"(%7253, %7254) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7256 = tensor.empty() : tensor<1x64xf32>
+    %7257 = "ttir.relu"(%7255, %7256) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7258 = tensor.empty() : tensor<1x64xf32>
+    %7259 = "ttir.relu"(%7257, %7258) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7260 = tensor.empty() : tensor<1x64xf32>
+    %7261 = "ttir.relu"(%7259, %7260) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7262 = tensor.empty() : tensor<1x64xf32>
+    %7263 = "ttir.relu"(%7261, %7262) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7264 = tensor.empty() : tensor<1x64xf32>
+    %7265 = "ttir.relu"(%7263, %7264) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7266 = tensor.empty() : tensor<1x64xf32>
+    %7267 = "ttir.relu"(%7265, %7266) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7268 = tensor.empty() : tensor<1x64xf32>
+    %7269 = "ttir.relu"(%7267, %7268) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7270 = tensor.empty() : tensor<1x64xf32>
+    %7271 = "ttir.relu"(%7269, %7270) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7272 = tensor.empty() : tensor<1x64xf32>
+    %7273 = "ttir.relu"(%7271, %7272) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7274 = tensor.empty() : tensor<1x64xf32>
+    %7275 = "ttir.relu"(%7273, %7274) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7276 = tensor.empty() : tensor<1x64xf32>
+    %7277 = "ttir.relu"(%7275, %7276) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7278 = tensor.empty() : tensor<1x64xf32>
+    %7279 = "ttir.relu"(%7277, %7278) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7280 = tensor.empty() : tensor<1x64xf32>
+    %7281 = "ttir.relu"(%7279, %7280) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7282 = tensor.empty() : tensor<1x64xf32>
+    %7283 = "ttir.relu"(%7281, %7282) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7284 = tensor.empty() : tensor<1x64xf32>
+    %7285 = "ttir.relu"(%7283, %7284) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7286 = tensor.empty() : tensor<1x64xf32>
+    %7287 = "ttir.relu"(%7285, %7286) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7288 = tensor.empty() : tensor<1x64xf32>
+    %7289 = "ttir.relu"(%7287, %7288) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7290 = tensor.empty() : tensor<1x64xf32>
+    %7291 = "ttir.relu"(%7289, %7290) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7292 = tensor.empty() : tensor<1x64xf32>
+    %7293 = "ttir.relu"(%7291, %7292) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7294 = tensor.empty() : tensor<1x64xf32>
+    %7295 = "ttir.relu"(%7293, %7294) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7296 = tensor.empty() : tensor<1x64xf32>
+    %7297 = "ttir.relu"(%7295, %7296) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7298 = tensor.empty() : tensor<1x64xf32>
+    %7299 = "ttir.relu"(%7297, %7298) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7300 = tensor.empty() : tensor<1x64xf32>
+    %7301 = "ttir.relu"(%7299, %7300) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7302 = tensor.empty() : tensor<1x64xf32>
+    %7303 = "ttir.relu"(%7301, %7302) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7304 = tensor.empty() : tensor<1x64xf32>
+    %7305 = "ttir.relu"(%7303, %7304) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7306 = tensor.empty() : tensor<1x64xf32>
+    %7307 = "ttir.relu"(%7305, %7306) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7308 = tensor.empty() : tensor<1x64xf32>
+    %7309 = "ttir.relu"(%7307, %7308) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7310 = tensor.empty() : tensor<1x64xf32>
+    %7311 = "ttir.relu"(%7309, %7310) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7312 = tensor.empty() : tensor<1x64xf32>
+    %7313 = "ttir.relu"(%7311, %7312) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7314 = tensor.empty() : tensor<1x64xf32>
+    %7315 = "ttir.relu"(%7313, %7314) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7316 = tensor.empty() : tensor<1x64xf32>
+    %7317 = "ttir.relu"(%7315, %7316) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7318 = tensor.empty() : tensor<1x64xf32>
+    %7319 = "ttir.relu"(%7317, %7318) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7320 = tensor.empty() : tensor<1x64xf32>
+    %7321 = "ttir.relu"(%7319, %7320) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7322 = tensor.empty() : tensor<1x64xf32>
+    %7323 = "ttir.relu"(%7321, %7322) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7324 = tensor.empty() : tensor<1x64xf32>
+    %7325 = "ttir.relu"(%7323, %7324) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7326 = tensor.empty() : tensor<1x64xf32>
+    %7327 = "ttir.relu"(%7325, %7326) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7328 = tensor.empty() : tensor<1x64xf32>
+    %7329 = "ttir.relu"(%7327, %7328) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7330 = tensor.empty() : tensor<1x64xf32>
+    %7331 = "ttir.relu"(%7329, %7330) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7332 = tensor.empty() : tensor<1x64xf32>
+    %7333 = "ttir.relu"(%7331, %7332) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7334 = tensor.empty() : tensor<1x64xf32>
+    %7335 = "ttir.relu"(%7333, %7334) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7336 = tensor.empty() : tensor<1x64xf32>
+    %7337 = "ttir.relu"(%7335, %7336) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7338 = tensor.empty() : tensor<1x64xf32>
+    %7339 = "ttir.relu"(%7337, %7338) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7340 = tensor.empty() : tensor<1x64xf32>
+    %7341 = "ttir.relu"(%7339, %7340) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7342 = tensor.empty() : tensor<1x64xf32>
+    %7343 = "ttir.relu"(%7341, %7342) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7344 = tensor.empty() : tensor<1x64xf32>
+    %7345 = "ttir.relu"(%7343, %7344) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7346 = tensor.empty() : tensor<1x64xf32>
+    %7347 = "ttir.relu"(%7345, %7346) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7348 = tensor.empty() : tensor<1x64xf32>
+    %7349 = "ttir.relu"(%7347, %7348) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7350 = tensor.empty() : tensor<1x64xf32>
+    %7351 = "ttir.relu"(%7349, %7350) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7352 = tensor.empty() : tensor<1x64xf32>
+    %7353 = "ttir.relu"(%7351, %7352) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7354 = tensor.empty() : tensor<1x64xf32>
+    %7355 = "ttir.relu"(%7353, %7354) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7356 = tensor.empty() : tensor<1x64xf32>
+    %7357 = "ttir.relu"(%7355, %7356) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7358 = tensor.empty() : tensor<1x64xf32>
+    %7359 = "ttir.relu"(%7357, %7358) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7360 = tensor.empty() : tensor<1x64xf32>
+    %7361 = "ttir.relu"(%7359, %7360) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7362 = tensor.empty() : tensor<1x64xf32>
+    %7363 = "ttir.relu"(%7361, %7362) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7364 = tensor.empty() : tensor<1x64xf32>
+    %7365 = "ttir.relu"(%7363, %7364) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7366 = tensor.empty() : tensor<1x64xf32>
+    %7367 = "ttir.relu"(%7365, %7366) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7368 = tensor.empty() : tensor<1x64xf32>
+    %7369 = "ttir.relu"(%7367, %7368) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7370 = tensor.empty() : tensor<1x64xf32>
+    %7371 = "ttir.relu"(%7369, %7370) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7372 = tensor.empty() : tensor<1x64xf32>
+    %7373 = "ttir.relu"(%7371, %7372) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7374 = tensor.empty() : tensor<1x64xf32>
+    %7375 = "ttir.relu"(%7373, %7374) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7376 = tensor.empty() : tensor<1x64xf32>
+    %7377 = "ttir.relu"(%7375, %7376) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7378 = tensor.empty() : tensor<1x64xf32>
+    %7379 = "ttir.relu"(%7377, %7378) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7380 = tensor.empty() : tensor<1x64xf32>
+    %7381 = "ttir.relu"(%7379, %7380) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7382 = tensor.empty() : tensor<1x64xf32>
+    %7383 = "ttir.relu"(%7381, %7382) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7384 = tensor.empty() : tensor<1x64xf32>
+    %7385 = "ttir.relu"(%7383, %7384) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7386 = tensor.empty() : tensor<1x64xf32>
+    %7387 = "ttir.relu"(%7385, %7386) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7388 = tensor.empty() : tensor<1x64xf32>
+    %7389 = "ttir.relu"(%7387, %7388) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7390 = tensor.empty() : tensor<1x64xf32>
+    %7391 = "ttir.relu"(%7389, %7390) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7392 = tensor.empty() : tensor<1x64xf32>
+    %7393 = "ttir.relu"(%7391, %7392) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7394 = tensor.empty() : tensor<1x64xf32>
+    %7395 = "ttir.relu"(%7393, %7394) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7396 = tensor.empty() : tensor<1x64xf32>
+    %7397 = "ttir.relu"(%7395, %7396) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7398 = tensor.empty() : tensor<1x64xf32>
+    %7399 = "ttir.relu"(%7397, %7398) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7400 = tensor.empty() : tensor<1x64xf32>
+    %7401 = "ttir.relu"(%7399, %7400) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7402 = tensor.empty() : tensor<1x64xf32>
+    %7403 = "ttir.relu"(%7401, %7402) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7404 = tensor.empty() : tensor<1x64xf32>
+    %7405 = "ttir.relu"(%7403, %7404) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7406 = tensor.empty() : tensor<1x64xf32>
+    %7407 = "ttir.relu"(%7405, %7406) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7408 = tensor.empty() : tensor<1x64xf32>
+    %7409 = "ttir.relu"(%7407, %7408) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7410 = tensor.empty() : tensor<1x64xf32>
+    %7411 = "ttir.relu"(%7409, %7410) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7412 = tensor.empty() : tensor<1x64xf32>
+    %7413 = "ttir.relu"(%7411, %7412) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7414 = tensor.empty() : tensor<1x64xf32>
+    %7415 = "ttir.relu"(%7413, %7414) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7416 = tensor.empty() : tensor<1x64xf32>
+    %7417 = "ttir.relu"(%7415, %7416) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7418 = tensor.empty() : tensor<1x64xf32>
+    %7419 = "ttir.relu"(%7417, %7418) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7420 = tensor.empty() : tensor<1x64xf32>
+    %7421 = "ttir.relu"(%7419, %7420) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7422 = tensor.empty() : tensor<1x64xf32>
+    %7423 = "ttir.relu"(%7421, %7422) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7424 = tensor.empty() : tensor<1x64xf32>
+    %7425 = "ttir.relu"(%7423, %7424) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7426 = tensor.empty() : tensor<1x64xf32>
+    %7427 = "ttir.relu"(%7425, %7426) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7428 = tensor.empty() : tensor<1x64xf32>
+    %7429 = "ttir.relu"(%7427, %7428) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7430 = tensor.empty() : tensor<1x64xf32>
+    %7431 = "ttir.relu"(%7429, %7430) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7432 = tensor.empty() : tensor<1x64xf32>
+    %7433 = "ttir.relu"(%7431, %7432) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7434 = tensor.empty() : tensor<1x64xf32>
+    %7435 = "ttir.relu"(%7433, %7434) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7436 = tensor.empty() : tensor<1x64xf32>
+    %7437 = "ttir.relu"(%7435, %7436) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7438 = tensor.empty() : tensor<1x64xf32>
+    %7439 = "ttir.relu"(%7437, %7438) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7440 = tensor.empty() : tensor<1x64xf32>
+    %7441 = "ttir.relu"(%7439, %7440) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7442 = tensor.empty() : tensor<1x64xf32>
+    %7443 = "ttir.relu"(%7441, %7442) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7444 = tensor.empty() : tensor<1x64xf32>
+    %7445 = "ttir.relu"(%7443, %7444) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7446 = tensor.empty() : tensor<1x64xf32>
+    %7447 = "ttir.relu"(%7445, %7446) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7448 = tensor.empty() : tensor<1x64xf32>
+    %7449 = "ttir.relu"(%7447, %7448) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7450 = tensor.empty() : tensor<1x64xf32>
+    %7451 = "ttir.relu"(%7449, %7450) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7452 = tensor.empty() : tensor<1x64xf32>
+    %7453 = "ttir.relu"(%7451, %7452) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7454 = tensor.empty() : tensor<1x64xf32>
+    %7455 = "ttir.relu"(%7453, %7454) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7456 = tensor.empty() : tensor<1x64xf32>
+    %7457 = "ttir.relu"(%7455, %7456) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7458 = tensor.empty() : tensor<1x64xf32>
+    %7459 = "ttir.relu"(%7457, %7458) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7460 = tensor.empty() : tensor<1x64xf32>
+    %7461 = "ttir.relu"(%7459, %7460) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7462 = tensor.empty() : tensor<1x64xf32>
+    %7463 = "ttir.relu"(%7461, %7462) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7464 = tensor.empty() : tensor<1x64xf32>
+    %7465 = "ttir.relu"(%7463, %7464) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7466 = tensor.empty() : tensor<1x64xf32>
+    %7467 = "ttir.relu"(%7465, %7466) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7468 = tensor.empty() : tensor<1x64xf32>
+    %7469 = "ttir.relu"(%7467, %7468) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7470 = tensor.empty() : tensor<1x64xf32>
+    %7471 = "ttir.relu"(%7469, %7470) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7472 = tensor.empty() : tensor<1x64xf32>
+    %7473 = "ttir.relu"(%7471, %7472) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7474 = tensor.empty() : tensor<1x64xf32>
+    %7475 = "ttir.relu"(%7473, %7474) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7476 = tensor.empty() : tensor<1x64xf32>
+    %7477 = "ttir.relu"(%7475, %7476) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7478 = tensor.empty() : tensor<1x64xf32>
+    %7479 = "ttir.relu"(%7477, %7478) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7480 = tensor.empty() : tensor<1x64xf32>
+    %7481 = "ttir.relu"(%7479, %7480) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7482 = tensor.empty() : tensor<1x64xf32>
+    %7483 = "ttir.relu"(%7481, %7482) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7484 = tensor.empty() : tensor<1x64xf32>
+    %7485 = "ttir.relu"(%7483, %7484) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7486 = tensor.empty() : tensor<1x64xf32>
+    %7487 = "ttir.relu"(%7485, %7486) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7488 = tensor.empty() : tensor<1x64xf32>
+    %7489 = "ttir.relu"(%7487, %7488) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7490 = tensor.empty() : tensor<1x64xf32>
+    %7491 = "ttir.relu"(%7489, %7490) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7492 = tensor.empty() : tensor<1x64xf32>
+    %7493 = "ttir.relu"(%7491, %7492) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7494 = tensor.empty() : tensor<1x64xf32>
+    %7495 = "ttir.relu"(%7493, %7494) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7496 = tensor.empty() : tensor<1x64xf32>
+    %7497 = "ttir.relu"(%7495, %7496) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7498 = tensor.empty() : tensor<1x64xf32>
+    %7499 = "ttir.relu"(%7497, %7498) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7500 = tensor.empty() : tensor<1x64xf32>
+    %7501 = "ttir.relu"(%7499, %7500) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7502 = tensor.empty() : tensor<1x64xf32>
+    %7503 = "ttir.relu"(%7501, %7502) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7504 = tensor.empty() : tensor<1x64xf32>
+    %7505 = "ttir.relu"(%7503, %7504) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7506 = tensor.empty() : tensor<1x64xf32>
+    %7507 = "ttir.relu"(%7505, %7506) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7508 = tensor.empty() : tensor<1x64xf32>
+    %7509 = "ttir.relu"(%7507, %7508) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7510 = tensor.empty() : tensor<1x64xf32>
+    %7511 = "ttir.relu"(%7509, %7510) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7512 = tensor.empty() : tensor<1x64xf32>
+    %7513 = "ttir.relu"(%7511, %7512) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7514 = tensor.empty() : tensor<1x64xf32>
+    %7515 = "ttir.relu"(%7513, %7514) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7516 = tensor.empty() : tensor<1x64xf32>
+    %7517 = "ttir.relu"(%7515, %7516) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7518 = tensor.empty() : tensor<1x64xf32>
+    %7519 = "ttir.relu"(%7517, %7518) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7520 = tensor.empty() : tensor<1x64xf32>
+    %7521 = "ttir.relu"(%7519, %7520) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7522 = tensor.empty() : tensor<1x64xf32>
+    %7523 = "ttir.relu"(%7521, %7522) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7524 = tensor.empty() : tensor<1x64xf32>
+    %7525 = "ttir.relu"(%7523, %7524) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7526 = tensor.empty() : tensor<1x64xf32>
+    %7527 = "ttir.relu"(%7525, %7526) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7528 = tensor.empty() : tensor<1x64xf32>
+    %7529 = "ttir.relu"(%7527, %7528) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7530 = tensor.empty() : tensor<1x64xf32>
+    %7531 = "ttir.relu"(%7529, %7530) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7532 = tensor.empty() : tensor<1x64xf32>
+    %7533 = "ttir.relu"(%7531, %7532) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7534 = tensor.empty() : tensor<1x64xf32>
+    %7535 = "ttir.relu"(%7533, %7534) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7536 = tensor.empty() : tensor<1x64xf32>
+    %7537 = "ttir.relu"(%7535, %7536) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7538 = tensor.empty() : tensor<1x64xf32>
+    %7539 = "ttir.relu"(%7537, %7538) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7540 = tensor.empty() : tensor<1x64xf32>
+    %7541 = "ttir.relu"(%7539, %7540) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7542 = tensor.empty() : tensor<1x64xf32>
+    %7543 = "ttir.relu"(%7541, %7542) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7544 = tensor.empty() : tensor<1x64xf32>
+    %7545 = "ttir.relu"(%7543, %7544) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7546 = tensor.empty() : tensor<1x64xf32>
+    %7547 = "ttir.relu"(%7545, %7546) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7548 = tensor.empty() : tensor<1x64xf32>
+    %7549 = "ttir.relu"(%7547, %7548) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7550 = tensor.empty() : tensor<1x64xf32>
+    %7551 = "ttir.relu"(%7549, %7550) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7552 = tensor.empty() : tensor<1x64xf32>
+    %7553 = "ttir.relu"(%7551, %7552) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7554 = tensor.empty() : tensor<1x64xf32>
+    %7555 = "ttir.relu"(%7553, %7554) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7556 = tensor.empty() : tensor<1x64xf32>
+    %7557 = "ttir.relu"(%7555, %7556) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7558 = tensor.empty() : tensor<1x64xf32>
+    %7559 = "ttir.relu"(%7557, %7558) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7560 = tensor.empty() : tensor<1x64xf32>
+    %7561 = "ttir.relu"(%7559, %7560) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7562 = tensor.empty() : tensor<1x64xf32>
+    %7563 = "ttir.relu"(%7561, %7562) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7564 = tensor.empty() : tensor<1x64xf32>
+    %7565 = "ttir.relu"(%7563, %7564) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7566 = tensor.empty() : tensor<1x64xf32>
+    %7567 = "ttir.relu"(%7565, %7566) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7568 = tensor.empty() : tensor<1x64xf32>
+    %7569 = "ttir.relu"(%7567, %7568) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7570 = tensor.empty() : tensor<1x64xf32>
+    %7571 = "ttir.relu"(%7569, %7570) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7572 = tensor.empty() : tensor<1x64xf32>
+    %7573 = "ttir.relu"(%7571, %7572) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7574 = tensor.empty() : tensor<1x64xf32>
+    %7575 = "ttir.relu"(%7573, %7574) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7576 = tensor.empty() : tensor<1x64xf32>
+    %7577 = "ttir.relu"(%7575, %7576) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7578 = tensor.empty() : tensor<1x64xf32>
+    %7579 = "ttir.relu"(%7577, %7578) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7580 = tensor.empty() : tensor<1x64xf32>
+    %7581 = "ttir.relu"(%7579, %7580) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7582 = tensor.empty() : tensor<1x64xf32>
+    %7583 = "ttir.relu"(%7581, %7582) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7584 = tensor.empty() : tensor<1x64xf32>
+    %7585 = "ttir.relu"(%7583, %7584) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7586 = tensor.empty() : tensor<1x64xf32>
+    %7587 = "ttir.relu"(%7585, %7586) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7588 = tensor.empty() : tensor<1x64xf32>
+    %7589 = "ttir.relu"(%7587, %7588) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7590 = tensor.empty() : tensor<1x64xf32>
+    %7591 = "ttir.relu"(%7589, %7590) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7592 = tensor.empty() : tensor<1x64xf32>
+    %7593 = "ttir.relu"(%7591, %7592) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7594 = tensor.empty() : tensor<1x64xf32>
+    %7595 = "ttir.relu"(%7593, %7594) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7596 = tensor.empty() : tensor<1x64xf32>
+    %7597 = "ttir.relu"(%7595, %7596) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7598 = tensor.empty() : tensor<1x64xf32>
+    %7599 = "ttir.relu"(%7597, %7598) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7600 = tensor.empty() : tensor<1x64xf32>
+    %7601 = "ttir.relu"(%7599, %7600) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7602 = tensor.empty() : tensor<1x64xf32>
+    %7603 = "ttir.relu"(%7601, %7602) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7604 = tensor.empty() : tensor<1x64xf32>
+    %7605 = "ttir.relu"(%7603, %7604) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7606 = tensor.empty() : tensor<1x64xf32>
+    %7607 = "ttir.relu"(%7605, %7606) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7608 = tensor.empty() : tensor<1x64xf32>
+    %7609 = "ttir.relu"(%7607, %7608) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7610 = tensor.empty() : tensor<1x64xf32>
+    %7611 = "ttir.relu"(%7609, %7610) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7612 = tensor.empty() : tensor<1x64xf32>
+    %7613 = "ttir.relu"(%7611, %7612) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7614 = tensor.empty() : tensor<1x64xf32>
+    %7615 = "ttir.relu"(%7613, %7614) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7616 = tensor.empty() : tensor<1x64xf32>
+    %7617 = "ttir.relu"(%7615, %7616) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7618 = tensor.empty() : tensor<1x64xf32>
+    %7619 = "ttir.relu"(%7617, %7618) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7620 = tensor.empty() : tensor<1x64xf32>
+    %7621 = "ttir.relu"(%7619, %7620) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7622 = tensor.empty() : tensor<1x64xf32>
+    %7623 = "ttir.relu"(%7621, %7622) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7624 = tensor.empty() : tensor<1x64xf32>
+    %7625 = "ttir.relu"(%7623, %7624) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7626 = tensor.empty() : tensor<1x64xf32>
+    %7627 = "ttir.relu"(%7625, %7626) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7628 = tensor.empty() : tensor<1x64xf32>
+    %7629 = "ttir.relu"(%7627, %7628) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7630 = tensor.empty() : tensor<1x64xf32>
+    %7631 = "ttir.relu"(%7629, %7630) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7632 = tensor.empty() : tensor<1x64xf32>
+    %7633 = "ttir.relu"(%7631, %7632) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7634 = tensor.empty() : tensor<1x64xf32>
+    %7635 = "ttir.relu"(%7633, %7634) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7636 = tensor.empty() : tensor<1x64xf32>
+    %7637 = "ttir.relu"(%7635, %7636) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7638 = tensor.empty() : tensor<1x64xf32>
+    %7639 = "ttir.relu"(%7637, %7638) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7640 = tensor.empty() : tensor<1x64xf32>
+    %7641 = "ttir.relu"(%7639, %7640) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7642 = tensor.empty() : tensor<1x64xf32>
+    %7643 = "ttir.relu"(%7641, %7642) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7644 = tensor.empty() : tensor<1x64xf32>
+    %7645 = "ttir.relu"(%7643, %7644) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7646 = tensor.empty() : tensor<1x64xf32>
+    %7647 = "ttir.relu"(%7645, %7646) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7648 = tensor.empty() : tensor<1x64xf32>
+    %7649 = "ttir.relu"(%7647, %7648) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7650 = tensor.empty() : tensor<1x64xf32>
+    %7651 = "ttir.relu"(%7649, %7650) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7652 = tensor.empty() : tensor<1x64xf32>
+    %7653 = "ttir.relu"(%7651, %7652) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7654 = tensor.empty() : tensor<1x64xf32>
+    %7655 = "ttir.relu"(%7653, %7654) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7656 = tensor.empty() : tensor<1x64xf32>
+    %7657 = "ttir.relu"(%7655, %7656) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7658 = tensor.empty() : tensor<1x64xf32>
+    %7659 = "ttir.relu"(%7657, %7658) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7660 = tensor.empty() : tensor<1x64xf32>
+    %7661 = "ttir.relu"(%7659, %7660) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7662 = tensor.empty() : tensor<1x64xf32>
+    %7663 = "ttir.relu"(%7661, %7662) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7664 = tensor.empty() : tensor<1x64xf32>
+    %7665 = "ttir.relu"(%7663, %7664) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7666 = tensor.empty() : tensor<1x64xf32>
+    %7667 = "ttir.relu"(%7665, %7666) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7668 = tensor.empty() : tensor<1x64xf32>
+    %7669 = "ttir.relu"(%7667, %7668) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7670 = tensor.empty() : tensor<1x64xf32>
+    %7671 = "ttir.relu"(%7669, %7670) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7672 = tensor.empty() : tensor<1x64xf32>
+    %7673 = "ttir.relu"(%7671, %7672) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7674 = tensor.empty() : tensor<1x64xf32>
+    %7675 = "ttir.relu"(%7673, %7674) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7676 = tensor.empty() : tensor<1x64xf32>
+    %7677 = "ttir.relu"(%7675, %7676) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7678 = tensor.empty() : tensor<1x64xf32>
+    %7679 = "ttir.relu"(%7677, %7678) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7680 = tensor.empty() : tensor<1x64xf32>
+    %7681 = "ttir.relu"(%7679, %7680) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7682 = tensor.empty() : tensor<1x64xf32>
+    %7683 = "ttir.relu"(%7681, %7682) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7684 = tensor.empty() : tensor<1x64xf32>
+    %7685 = "ttir.relu"(%7683, %7684) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7686 = tensor.empty() : tensor<1x64xf32>
+    %7687 = "ttir.relu"(%7685, %7686) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7688 = tensor.empty() : tensor<1x64xf32>
+    %7689 = "ttir.relu"(%7687, %7688) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7690 = tensor.empty() : tensor<1x64xf32>
+    %7691 = "ttir.relu"(%7689, %7690) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7692 = tensor.empty() : tensor<1x64xf32>
+    %7693 = "ttir.relu"(%7691, %7692) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7694 = tensor.empty() : tensor<1x64xf32>
+    %7695 = "ttir.relu"(%7693, %7694) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7696 = tensor.empty() : tensor<1x64xf32>
+    %7697 = "ttir.relu"(%7695, %7696) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7698 = tensor.empty() : tensor<1x64xf32>
+    %7699 = "ttir.relu"(%7697, %7698) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7700 = tensor.empty() : tensor<1x64xf32>
+    %7701 = "ttir.relu"(%7699, %7700) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7702 = tensor.empty() : tensor<1x64xf32>
+    %7703 = "ttir.relu"(%7701, %7702) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7704 = tensor.empty() : tensor<1x64xf32>
+    %7705 = "ttir.relu"(%7703, %7704) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7706 = tensor.empty() : tensor<1x64xf32>
+    %7707 = "ttir.relu"(%7705, %7706) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7708 = tensor.empty() : tensor<1x64xf32>
+    %7709 = "ttir.relu"(%7707, %7708) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7710 = tensor.empty() : tensor<1x64xf32>
+    %7711 = "ttir.relu"(%7709, %7710) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7712 = tensor.empty() : tensor<1x64xf32>
+    %7713 = "ttir.relu"(%7711, %7712) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7714 = tensor.empty() : tensor<1x64xf32>
+    %7715 = "ttir.relu"(%7713, %7714) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7716 = tensor.empty() : tensor<1x64xf32>
+    %7717 = "ttir.relu"(%7715, %7716) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7718 = tensor.empty() : tensor<1x64xf32>
+    %7719 = "ttir.relu"(%7717, %7718) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7720 = tensor.empty() : tensor<1x64xf32>
+    %7721 = "ttir.relu"(%7719, %7720) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7722 = tensor.empty() : tensor<1x64xf32>
+    %7723 = "ttir.relu"(%7721, %7722) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7724 = tensor.empty() : tensor<1x64xf32>
+    %7725 = "ttir.relu"(%7723, %7724) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7726 = tensor.empty() : tensor<1x64xf32>
+    %7727 = "ttir.relu"(%7725, %7726) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7728 = tensor.empty() : tensor<1x64xf32>
+    %7729 = "ttir.relu"(%7727, %7728) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7730 = tensor.empty() : tensor<1x64xf32>
+    %7731 = "ttir.relu"(%7729, %7730) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7732 = tensor.empty() : tensor<1x64xf32>
+    %7733 = "ttir.relu"(%7731, %7732) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7734 = tensor.empty() : tensor<1x64xf32>
+    %7735 = "ttir.relu"(%7733, %7734) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7736 = tensor.empty() : tensor<1x64xf32>
+    %7737 = "ttir.relu"(%7735, %7736) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7738 = tensor.empty() : tensor<1x64xf32>
+    %7739 = "ttir.relu"(%7737, %7738) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7740 = tensor.empty() : tensor<1x64xf32>
+    %7741 = "ttir.relu"(%7739, %7740) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7742 = tensor.empty() : tensor<1x64xf32>
+    %7743 = "ttir.relu"(%7741, %7742) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7744 = tensor.empty() : tensor<1x64xf32>
+    %7745 = "ttir.relu"(%7743, %7744) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7746 = tensor.empty() : tensor<1x64xf32>
+    %7747 = "ttir.relu"(%7745, %7746) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7748 = tensor.empty() : tensor<1x64xf32>
+    %7749 = "ttir.relu"(%7747, %7748) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7750 = tensor.empty() : tensor<1x64xf32>
+    %7751 = "ttir.relu"(%7749, %7750) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7752 = tensor.empty() : tensor<1x64xf32>
+    %7753 = "ttir.relu"(%7751, %7752) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7754 = tensor.empty() : tensor<1x64xf32>
+    %7755 = "ttir.relu"(%7753, %7754) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7756 = tensor.empty() : tensor<1x64xf32>
+    %7757 = "ttir.relu"(%7755, %7756) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7758 = tensor.empty() : tensor<1x64xf32>
+    %7759 = "ttir.relu"(%7757, %7758) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7760 = tensor.empty() : tensor<1x64xf32>
+    %7761 = "ttir.relu"(%7759, %7760) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7762 = tensor.empty() : tensor<1x64xf32>
+    %7763 = "ttir.relu"(%7761, %7762) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7764 = tensor.empty() : tensor<1x64xf32>
+    %7765 = "ttir.relu"(%7763, %7764) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7766 = tensor.empty() : tensor<1x64xf32>
+    %7767 = "ttir.relu"(%7765, %7766) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7768 = tensor.empty() : tensor<1x64xf32>
+    %7769 = "ttir.relu"(%7767, %7768) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7770 = tensor.empty() : tensor<1x64xf32>
+    %7771 = "ttir.relu"(%7769, %7770) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7772 = tensor.empty() : tensor<1x64xf32>
+    %7773 = "ttir.relu"(%7771, %7772) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7774 = tensor.empty() : tensor<1x64xf32>
+    %7775 = "ttir.relu"(%7773, %7774) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7776 = tensor.empty() : tensor<1x64xf32>
+    %7777 = "ttir.relu"(%7775, %7776) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7778 = tensor.empty() : tensor<1x64xf32>
+    %7779 = "ttir.relu"(%7777, %7778) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7780 = tensor.empty() : tensor<1x64xf32>
+    %7781 = "ttir.relu"(%7779, %7780) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7782 = tensor.empty() : tensor<1x64xf32>
+    %7783 = "ttir.relu"(%7781, %7782) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7784 = tensor.empty() : tensor<1x64xf32>
+    %7785 = "ttir.relu"(%7783, %7784) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7786 = tensor.empty() : tensor<1x64xf32>
+    %7787 = "ttir.relu"(%7785, %7786) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7788 = tensor.empty() : tensor<1x64xf32>
+    %7789 = "ttir.relu"(%7787, %7788) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7790 = tensor.empty() : tensor<1x64xf32>
+    %7791 = "ttir.relu"(%7789, %7790) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7792 = tensor.empty() : tensor<1x64xf32>
+    %7793 = "ttir.relu"(%7791, %7792) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7794 = tensor.empty() : tensor<1x64xf32>
+    %7795 = "ttir.relu"(%7793, %7794) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7796 = tensor.empty() : tensor<1x64xf32>
+    %7797 = "ttir.relu"(%7795, %7796) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7798 = tensor.empty() : tensor<1x64xf32>
+    %7799 = "ttir.relu"(%7797, %7798) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7800 = tensor.empty() : tensor<1x64xf32>
+    %7801 = "ttir.relu"(%7799, %7800) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7802 = tensor.empty() : tensor<1x64xf32>
+    %7803 = "ttir.relu"(%7801, %7802) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7804 = tensor.empty() : tensor<1x64xf32>
+    %7805 = "ttir.relu"(%7803, %7804) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7806 = tensor.empty() : tensor<1x64xf32>
+    %7807 = "ttir.relu"(%7805, %7806) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7808 = tensor.empty() : tensor<1x64xf32>
+    %7809 = "ttir.relu"(%7807, %7808) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7810 = tensor.empty() : tensor<1x64xf32>
+    %7811 = "ttir.relu"(%7809, %7810) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7812 = tensor.empty() : tensor<1x64xf32>
+    %7813 = "ttir.relu"(%7811, %7812) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7814 = tensor.empty() : tensor<1x64xf32>
+    %7815 = "ttir.relu"(%7813, %7814) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7816 = tensor.empty() : tensor<1x64xf32>
+    %7817 = "ttir.relu"(%7815, %7816) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7818 = tensor.empty() : tensor<1x64xf32>
+    %7819 = "ttir.relu"(%7817, %7818) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7820 = tensor.empty() : tensor<1x64xf32>
+    %7821 = "ttir.relu"(%7819, %7820) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7822 = tensor.empty() : tensor<1x64xf32>
+    %7823 = "ttir.relu"(%7821, %7822) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7824 = tensor.empty() : tensor<1x64xf32>
+    %7825 = "ttir.relu"(%7823, %7824) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7826 = tensor.empty() : tensor<1x64xf32>
+    %7827 = "ttir.relu"(%7825, %7826) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7828 = tensor.empty() : tensor<1x64xf32>
+    %7829 = "ttir.relu"(%7827, %7828) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7830 = tensor.empty() : tensor<1x64xf32>
+    %7831 = "ttir.relu"(%7829, %7830) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7832 = tensor.empty() : tensor<1x64xf32>
+    %7833 = "ttir.relu"(%7831, %7832) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7834 = tensor.empty() : tensor<1x64xf32>
+    %7835 = "ttir.relu"(%7833, %7834) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7836 = tensor.empty() : tensor<1x64xf32>
+    %7837 = "ttir.relu"(%7835, %7836) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7838 = tensor.empty() : tensor<1x64xf32>
+    %7839 = "ttir.relu"(%7837, %7838) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7840 = tensor.empty() : tensor<1x64xf32>
+    %7841 = "ttir.relu"(%7839, %7840) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7842 = tensor.empty() : tensor<1x64xf32>
+    %7843 = "ttir.relu"(%7841, %7842) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7844 = tensor.empty() : tensor<1x64xf32>
+    %7845 = "ttir.relu"(%7843, %7844) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7846 = tensor.empty() : tensor<1x64xf32>
+    %7847 = "ttir.relu"(%7845, %7846) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7848 = tensor.empty() : tensor<1x64xf32>
+    %7849 = "ttir.relu"(%7847, %7848) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7850 = tensor.empty() : tensor<1x64xf32>
+    %7851 = "ttir.relu"(%7849, %7850) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7852 = tensor.empty() : tensor<1x64xf32>
+    %7853 = "ttir.relu"(%7851, %7852) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7854 = tensor.empty() : tensor<1x64xf32>
+    %7855 = "ttir.relu"(%7853, %7854) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7856 = tensor.empty() : tensor<1x64xf32>
+    %7857 = "ttir.relu"(%7855, %7856) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7858 = tensor.empty() : tensor<1x64xf32>
+    %7859 = "ttir.relu"(%7857, %7858) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7860 = tensor.empty() : tensor<1x64xf32>
+    %7861 = "ttir.relu"(%7859, %7860) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7862 = tensor.empty() : tensor<1x64xf32>
+    %7863 = "ttir.relu"(%7861, %7862) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7864 = tensor.empty() : tensor<1x64xf32>
+    %7865 = "ttir.relu"(%7863, %7864) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7866 = tensor.empty() : tensor<1x64xf32>
+    %7867 = "ttir.relu"(%7865, %7866) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7868 = tensor.empty() : tensor<1x64xf32>
+    %7869 = "ttir.relu"(%7867, %7868) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7870 = tensor.empty() : tensor<1x64xf32>
+    %7871 = "ttir.relu"(%7869, %7870) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7872 = tensor.empty() : tensor<1x64xf32>
+    %7873 = "ttir.relu"(%7871, %7872) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7874 = tensor.empty() : tensor<1x64xf32>
+    %7875 = "ttir.relu"(%7873, %7874) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7876 = tensor.empty() : tensor<1x64xf32>
+    %7877 = "ttir.relu"(%7875, %7876) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7878 = tensor.empty() : tensor<1x64xf32>
+    %7879 = "ttir.relu"(%7877, %7878) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7880 = tensor.empty() : tensor<1x64xf32>
+    %7881 = "ttir.relu"(%7879, %7880) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7882 = tensor.empty() : tensor<1x64xf32>
+    %7883 = "ttir.relu"(%7881, %7882) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7884 = tensor.empty() : tensor<1x64xf32>
+    %7885 = "ttir.relu"(%7883, %7884) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7886 = tensor.empty() : tensor<1x64xf32>
+    %7887 = "ttir.relu"(%7885, %7886) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7888 = tensor.empty() : tensor<1x64xf32>
+    %7889 = "ttir.relu"(%7887, %7888) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7890 = tensor.empty() : tensor<1x64xf32>
+    %7891 = "ttir.relu"(%7889, %7890) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7892 = tensor.empty() : tensor<1x64xf32>
+    %7893 = "ttir.relu"(%7891, %7892) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7894 = tensor.empty() : tensor<1x64xf32>
+    %7895 = "ttir.relu"(%7893, %7894) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7896 = tensor.empty() : tensor<1x64xf32>
+    %7897 = "ttir.relu"(%7895, %7896) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7898 = tensor.empty() : tensor<1x64xf32>
+    %7899 = "ttir.relu"(%7897, %7898) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7900 = tensor.empty() : tensor<1x64xf32>
+    %7901 = "ttir.relu"(%7899, %7900) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7902 = tensor.empty() : tensor<1x64xf32>
+    %7903 = "ttir.relu"(%7901, %7902) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7904 = tensor.empty() : tensor<1x64xf32>
+    %7905 = "ttir.relu"(%7903, %7904) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7906 = tensor.empty() : tensor<1x64xf32>
+    %7907 = "ttir.relu"(%7905, %7906) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7908 = tensor.empty() : tensor<1x64xf32>
+    %7909 = "ttir.relu"(%7907, %7908) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7910 = tensor.empty() : tensor<1x64xf32>
+    %7911 = "ttir.relu"(%7909, %7910) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7912 = tensor.empty() : tensor<1x64xf32>
+    %7913 = "ttir.relu"(%7911, %7912) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7914 = tensor.empty() : tensor<1x64xf32>
+    %7915 = "ttir.relu"(%7913, %7914) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7916 = tensor.empty() : tensor<1x64xf32>
+    %7917 = "ttir.relu"(%7915, %7916) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7918 = tensor.empty() : tensor<1x64xf32>
+    %7919 = "ttir.relu"(%7917, %7918) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7920 = tensor.empty() : tensor<1x64xf32>
+    %7921 = "ttir.relu"(%7919, %7920) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7922 = tensor.empty() : tensor<1x64xf32>
+    %7923 = "ttir.relu"(%7921, %7922) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7924 = tensor.empty() : tensor<1x64xf32>
+    %7925 = "ttir.relu"(%7923, %7924) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7926 = tensor.empty() : tensor<1x64xf32>
+    %7927 = "ttir.relu"(%7925, %7926) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7928 = tensor.empty() : tensor<1x64xf32>
+    %7929 = "ttir.relu"(%7927, %7928) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7930 = tensor.empty() : tensor<1x64xf32>
+    %7931 = "ttir.relu"(%7929, %7930) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7932 = tensor.empty() : tensor<1x64xf32>
+    %7933 = "ttir.relu"(%7931, %7932) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7934 = tensor.empty() : tensor<1x64xf32>
+    %7935 = "ttir.relu"(%7933, %7934) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7936 = tensor.empty() : tensor<1x64xf32>
+    %7937 = "ttir.relu"(%7935, %7936) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7938 = tensor.empty() : tensor<1x64xf32>
+    %7939 = "ttir.relu"(%7937, %7938) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7940 = tensor.empty() : tensor<1x64xf32>
+    %7941 = "ttir.relu"(%7939, %7940) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7942 = tensor.empty() : tensor<1x64xf32>
+    %7943 = "ttir.relu"(%7941, %7942) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7944 = tensor.empty() : tensor<1x64xf32>
+    %7945 = "ttir.relu"(%7943, %7944) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7946 = tensor.empty() : tensor<1x64xf32>
+    %7947 = "ttir.relu"(%7945, %7946) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7948 = tensor.empty() : tensor<1x64xf32>
+    %7949 = "ttir.relu"(%7947, %7948) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7950 = tensor.empty() : tensor<1x64xf32>
+    %7951 = "ttir.relu"(%7949, %7950) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7952 = tensor.empty() : tensor<1x64xf32>
+    %7953 = "ttir.relu"(%7951, %7952) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7954 = tensor.empty() : tensor<1x64xf32>
+    %7955 = "ttir.relu"(%7953, %7954) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7956 = tensor.empty() : tensor<1x64xf32>
+    %7957 = "ttir.relu"(%7955, %7956) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7958 = tensor.empty() : tensor<1x64xf32>
+    %7959 = "ttir.relu"(%7957, %7958) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7960 = tensor.empty() : tensor<1x64xf32>
+    %7961 = "ttir.relu"(%7959, %7960) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7962 = tensor.empty() : tensor<1x64xf32>
+    %7963 = "ttir.relu"(%7961, %7962) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7964 = tensor.empty() : tensor<1x64xf32>
+    %7965 = "ttir.relu"(%7963, %7964) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7966 = tensor.empty() : tensor<1x64xf32>
+    %7967 = "ttir.relu"(%7965, %7966) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7968 = tensor.empty() : tensor<1x64xf32>
+    %7969 = "ttir.relu"(%7967, %7968) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7970 = tensor.empty() : tensor<1x64xf32>
+    %7971 = "ttir.relu"(%7969, %7970) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7972 = tensor.empty() : tensor<1x64xf32>
+    %7973 = "ttir.relu"(%7971, %7972) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7974 = tensor.empty() : tensor<1x64xf32>
+    %7975 = "ttir.relu"(%7973, %7974) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7976 = tensor.empty() : tensor<1x64xf32>
+    %7977 = "ttir.relu"(%7975, %7976) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7978 = tensor.empty() : tensor<1x64xf32>
+    %7979 = "ttir.relu"(%7977, %7978) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7980 = tensor.empty() : tensor<1x64xf32>
+    %7981 = "ttir.relu"(%7979, %7980) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7982 = tensor.empty() : tensor<1x64xf32>
+    %7983 = "ttir.relu"(%7981, %7982) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7984 = tensor.empty() : tensor<1x64xf32>
+    %7985 = "ttir.relu"(%7983, %7984) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7986 = tensor.empty() : tensor<1x64xf32>
+    %7987 = "ttir.relu"(%7985, %7986) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7988 = tensor.empty() : tensor<1x64xf32>
+    %7989 = "ttir.relu"(%7987, %7988) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7990 = tensor.empty() : tensor<1x64xf32>
+    %7991 = "ttir.relu"(%7989, %7990) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7992 = tensor.empty() : tensor<1x64xf32>
+    %7993 = "ttir.relu"(%7991, %7992) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7994 = tensor.empty() : tensor<1x64xf32>
+    %7995 = "ttir.relu"(%7993, %7994) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7996 = tensor.empty() : tensor<1x64xf32>
+    %7997 = "ttir.relu"(%7995, %7996) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7998 = tensor.empty() : tensor<1x64xf32>
+    %7999 = "ttir.relu"(%7997, %7998) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8000 = tensor.empty() : tensor<1x64xf32>
+    %8001 = "ttir.relu"(%7999, %8000) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8002 = tensor.empty() : tensor<1x64xf32>
+    %8003 = "ttir.relu"(%8001, %8002) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8004 = tensor.empty() : tensor<1x64xf32>
+    %8005 = "ttir.relu"(%8003, %8004) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8006 = tensor.empty() : tensor<1x64xf32>
+    %8007 = "ttir.relu"(%8005, %8006) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8008 = tensor.empty() : tensor<1x64xf32>
+    %8009 = "ttir.relu"(%8007, %8008) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8010 = tensor.empty() : tensor<1x64xf32>
+    %8011 = "ttir.relu"(%8009, %8010) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8012 = tensor.empty() : tensor<1x64xf32>
+    %8013 = "ttir.relu"(%8011, %8012) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8014 = tensor.empty() : tensor<1x64xf32>
+    %8015 = "ttir.relu"(%8013, %8014) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8016 = tensor.empty() : tensor<1x64xf32>
+    %8017 = "ttir.relu"(%8015, %8016) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8018 = tensor.empty() : tensor<1x64xf32>
+    %8019 = "ttir.relu"(%8017, %8018) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8020 = tensor.empty() : tensor<1x64xf32>
+    %8021 = "ttir.relu"(%8019, %8020) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8022 = tensor.empty() : tensor<1x64xf32>
+    %8023 = "ttir.relu"(%8021, %8022) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8024 = tensor.empty() : tensor<1x64xf32>
+    %8025 = "ttir.relu"(%8023, %8024) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8026 = tensor.empty() : tensor<1x64xf32>
+    %8027 = "ttir.relu"(%8025, %8026) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8028 = tensor.empty() : tensor<1x64xf32>
+    %8029 = "ttir.relu"(%8027, %8028) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8030 = tensor.empty() : tensor<1x64xf32>
+    %8031 = "ttir.relu"(%8029, %8030) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8032 = tensor.empty() : tensor<1x64xf32>
+    %8033 = "ttir.relu"(%8031, %8032) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8034 = tensor.empty() : tensor<1x64xf32>
+    %8035 = "ttir.relu"(%8033, %8034) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8036 = tensor.empty() : tensor<1x64xf32>
+    %8037 = "ttir.relu"(%8035, %8036) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8038 = tensor.empty() : tensor<1x64xf32>
+    %8039 = "ttir.relu"(%8037, %8038) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8040 = tensor.empty() : tensor<1x64xf32>
+    %8041 = "ttir.relu"(%8039, %8040) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8042 = tensor.empty() : tensor<1x64xf32>
+    %8043 = "ttir.relu"(%8041, %8042) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8044 = tensor.empty() : tensor<1x64xf32>
+    %8045 = "ttir.relu"(%8043, %8044) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8046 = tensor.empty() : tensor<1x64xf32>
+    %8047 = "ttir.relu"(%8045, %8046) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8048 = tensor.empty() : tensor<1x64xf32>
+    %8049 = "ttir.relu"(%8047, %8048) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8050 = tensor.empty() : tensor<1x64xf32>
+    %8051 = "ttir.relu"(%8049, %8050) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8052 = tensor.empty() : tensor<1x64xf32>
+    %8053 = "ttir.relu"(%8051, %8052) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8054 = tensor.empty() : tensor<1x64xf32>
+    %8055 = "ttir.relu"(%8053, %8054) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8056 = tensor.empty() : tensor<1x64xf32>
+    %8057 = "ttir.relu"(%8055, %8056) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8058 = tensor.empty() : tensor<1x64xf32>
+    %8059 = "ttir.relu"(%8057, %8058) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8060 = tensor.empty() : tensor<1x64xf32>
+    %8061 = "ttir.relu"(%8059, %8060) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8062 = tensor.empty() : tensor<1x64xf32>
+    %8063 = "ttir.relu"(%8061, %8062) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8064 = tensor.empty() : tensor<1x64xf32>
+    %8065 = "ttir.relu"(%8063, %8064) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8066 = tensor.empty() : tensor<1x64xf32>
+    %8067 = "ttir.relu"(%8065, %8066) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8068 = tensor.empty() : tensor<1x64xf32>
+    %8069 = "ttir.relu"(%8067, %8068) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8070 = tensor.empty() : tensor<1x64xf32>
+    %8071 = "ttir.relu"(%8069, %8070) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8072 = tensor.empty() : tensor<1x64xf32>
+    %8073 = "ttir.relu"(%8071, %8072) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8074 = tensor.empty() : tensor<1x64xf32>
+    %8075 = "ttir.relu"(%8073, %8074) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8076 = tensor.empty() : tensor<1x64xf32>
+    %8077 = "ttir.relu"(%8075, %8076) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8078 = tensor.empty() : tensor<1x64xf32>
+    %8079 = "ttir.relu"(%8077, %8078) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8080 = tensor.empty() : tensor<1x64xf32>
+    %8081 = "ttir.relu"(%8079, %8080) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8082 = tensor.empty() : tensor<1x64xf32>
+    %8083 = "ttir.relu"(%8081, %8082) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8084 = tensor.empty() : tensor<1x64xf32>
+    %8085 = "ttir.relu"(%8083, %8084) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8086 = tensor.empty() : tensor<1x64xf32>
+    %8087 = "ttir.relu"(%8085, %8086) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8088 = tensor.empty() : tensor<1x64xf32>
+    %8089 = "ttir.relu"(%8087, %8088) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8090 = tensor.empty() : tensor<1x64xf32>
+    %8091 = "ttir.relu"(%8089, %8090) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8092 = tensor.empty() : tensor<1x64xf32>
+    %8093 = "ttir.relu"(%8091, %8092) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8094 = tensor.empty() : tensor<1x64xf32>
+    %8095 = "ttir.relu"(%8093, %8094) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8096 = tensor.empty() : tensor<1x64xf32>
+    %8097 = "ttir.relu"(%8095, %8096) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8098 = tensor.empty() : tensor<1x64xf32>
+    %8099 = "ttir.relu"(%8097, %8098) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8100 = tensor.empty() : tensor<1x64xf32>
+    %8101 = "ttir.relu"(%8099, %8100) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8102 = tensor.empty() : tensor<1x64xf32>
+    %8103 = "ttir.relu"(%8101, %8102) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8104 = tensor.empty() : tensor<1x64xf32>
+    %8105 = "ttir.relu"(%8103, %8104) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8106 = tensor.empty() : tensor<1x64xf32>
+    %8107 = "ttir.relu"(%8105, %8106) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8108 = tensor.empty() : tensor<1x64xf32>
+    %8109 = "ttir.relu"(%8107, %8108) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8110 = tensor.empty() : tensor<1x64xf32>
+    %8111 = "ttir.relu"(%8109, %8110) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8112 = tensor.empty() : tensor<1x64xf32>
+    %8113 = "ttir.relu"(%8111, %8112) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8114 = tensor.empty() : tensor<1x64xf32>
+    %8115 = "ttir.relu"(%8113, %8114) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8116 = tensor.empty() : tensor<1x64xf32>
+    %8117 = "ttir.relu"(%8115, %8116) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8118 = tensor.empty() : tensor<1x64xf32>
+    %8119 = "ttir.relu"(%8117, %8118) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8120 = tensor.empty() : tensor<1x64xf32>
+    %8121 = "ttir.relu"(%8119, %8120) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8122 = tensor.empty() : tensor<1x64xf32>
+    %8123 = "ttir.relu"(%8121, %8122) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8124 = tensor.empty() : tensor<1x64xf32>
+    %8125 = "ttir.relu"(%8123, %8124) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8126 = tensor.empty() : tensor<1x64xf32>
+    %8127 = "ttir.relu"(%8125, %8126) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8128 = tensor.empty() : tensor<1x64xf32>
+    %8129 = "ttir.relu"(%8127, %8128) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8130 = tensor.empty() : tensor<1x64xf32>
+    %8131 = "ttir.relu"(%8129, %8130) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8132 = tensor.empty() : tensor<1x64xf32>
+    %8133 = "ttir.relu"(%8131, %8132) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8134 = tensor.empty() : tensor<1x64xf32>
+    %8135 = "ttir.relu"(%8133, %8134) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8136 = tensor.empty() : tensor<1x64xf32>
+    %8137 = "ttir.relu"(%8135, %8136) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8138 = tensor.empty() : tensor<1x64xf32>
+    %8139 = "ttir.relu"(%8137, %8138) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8140 = tensor.empty() : tensor<1x64xf32>
+    %8141 = "ttir.relu"(%8139, %8140) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8142 = tensor.empty() : tensor<1x64xf32>
+    %8143 = "ttir.relu"(%8141, %8142) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8144 = tensor.empty() : tensor<1x64xf32>
+    %8145 = "ttir.relu"(%8143, %8144) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8146 = tensor.empty() : tensor<1x64xf32>
+    %8147 = "ttir.relu"(%8145, %8146) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8148 = tensor.empty() : tensor<1x64xf32>
+    %8149 = "ttir.relu"(%8147, %8148) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8150 = tensor.empty() : tensor<1x64xf32>
+    %8151 = "ttir.relu"(%8149, %8150) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8152 = tensor.empty() : tensor<1x64xf32>
+    %8153 = "ttir.relu"(%8151, %8152) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8154 = tensor.empty() : tensor<1x64xf32>
+    %8155 = "ttir.relu"(%8153, %8154) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8156 = tensor.empty() : tensor<1x64xf32>
+    %8157 = "ttir.relu"(%8155, %8156) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8158 = tensor.empty() : tensor<1x64xf32>
+    %8159 = "ttir.relu"(%8157, %8158) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8160 = tensor.empty() : tensor<1x64xf32>
+    %8161 = "ttir.relu"(%8159, %8160) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8162 = tensor.empty() : tensor<1x64xf32>
+    %8163 = "ttir.relu"(%8161, %8162) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8164 = tensor.empty() : tensor<1x64xf32>
+    %8165 = "ttir.relu"(%8163, %8164) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8166 = tensor.empty() : tensor<1x64xf32>
+    %8167 = "ttir.relu"(%8165, %8166) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8168 = tensor.empty() : tensor<1x64xf32>
+    %8169 = "ttir.relu"(%8167, %8168) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8170 = tensor.empty() : tensor<1x64xf32>
+    %8171 = "ttir.relu"(%8169, %8170) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8172 = tensor.empty() : tensor<1x64xf32>
+    %8173 = "ttir.relu"(%8171, %8172) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8174 = tensor.empty() : tensor<1x64xf32>
+    %8175 = "ttir.relu"(%8173, %8174) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8176 = tensor.empty() : tensor<1x64xf32>
+    %8177 = "ttir.relu"(%8175, %8176) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8178 = tensor.empty() : tensor<1x64xf32>
+    %8179 = "ttir.relu"(%8177, %8178) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8180 = tensor.empty() : tensor<1x64xf32>
+    %8181 = "ttir.relu"(%8179, %8180) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8182 = tensor.empty() : tensor<1x64xf32>
+    %8183 = "ttir.relu"(%8181, %8182) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8184 = tensor.empty() : tensor<1x64xf32>
+    %8185 = "ttir.relu"(%8183, %8184) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8186 = tensor.empty() : tensor<1x64xf32>
+    %8187 = "ttir.relu"(%8185, %8186) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8188 = tensor.empty() : tensor<1x64xf32>
+    %8189 = "ttir.relu"(%8187, %8188) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8190 = tensor.empty() : tensor<1x64xf32>
+    %8191 = "ttir.relu"(%8189, %8190) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8192 = tensor.empty() : tensor<1x64xf32>
+    %8193 = "ttir.relu"(%8191, %8192) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8194 = tensor.empty() : tensor<1x64xf32>
+    %8195 = "ttir.relu"(%8193, %8194) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8196 = tensor.empty() : tensor<1x64xf32>
+    %8197 = "ttir.relu"(%8195, %8196) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8198 = tensor.empty() : tensor<1x64xf32>
+    %8199 = "ttir.relu"(%8197, %8198) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8200 = tensor.empty() : tensor<1x64xf32>
+    %8201 = "ttir.relu"(%8199, %8200) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8202 = tensor.empty() : tensor<1x64xf32>
+    %8203 = "ttir.relu"(%8201, %8202) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8204 = tensor.empty() : tensor<1x64xf32>
+    %8205 = "ttir.relu"(%8203, %8204) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8206 = tensor.empty() : tensor<1x64xf32>
+    %8207 = "ttir.relu"(%8205, %8206) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8208 = tensor.empty() : tensor<1x64xf32>
+    %8209 = "ttir.relu"(%8207, %8208) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8210 = tensor.empty() : tensor<1x64xf32>
+    %8211 = "ttir.relu"(%8209, %8210) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8212 = tensor.empty() : tensor<1x64xf32>
+    %8213 = "ttir.relu"(%8211, %8212) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8214 = tensor.empty() : tensor<1x64xf32>
+    %8215 = "ttir.relu"(%8213, %8214) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8216 = tensor.empty() : tensor<1x64xf32>
+    %8217 = "ttir.relu"(%8215, %8216) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8218 = tensor.empty() : tensor<1x64xf32>
+    %8219 = "ttir.relu"(%8217, %8218) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8220 = tensor.empty() : tensor<1x64xf32>
+    %8221 = "ttir.relu"(%8219, %8220) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8222 = tensor.empty() : tensor<1x64xf32>
+    %8223 = "ttir.relu"(%8221, %8222) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8224 = tensor.empty() : tensor<1x64xf32>
+    %8225 = "ttir.relu"(%8223, %8224) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8226 = tensor.empty() : tensor<1x64xf32>
+    %8227 = "ttir.relu"(%8225, %8226) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8228 = tensor.empty() : tensor<1x64xf32>
+    %8229 = "ttir.relu"(%8227, %8228) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8230 = tensor.empty() : tensor<1x64xf32>
+    %8231 = "ttir.relu"(%8229, %8230) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8232 = tensor.empty() : tensor<1x64xf32>
+    %8233 = "ttir.relu"(%8231, %8232) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8234 = tensor.empty() : tensor<1x64xf32>
+    %8235 = "ttir.relu"(%8233, %8234) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8236 = tensor.empty() : tensor<1x64xf32>
+    %8237 = "ttir.relu"(%8235, %8236) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8238 = tensor.empty() : tensor<1x64xf32>
+    %8239 = "ttir.relu"(%8237, %8238) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8240 = tensor.empty() : tensor<1x64xf32>
+    %8241 = "ttir.relu"(%8239, %8240) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8242 = tensor.empty() : tensor<1x64xf32>
+    %8243 = "ttir.relu"(%8241, %8242) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8244 = tensor.empty() : tensor<1x64xf32>
+    %8245 = "ttir.relu"(%8243, %8244) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8246 = tensor.empty() : tensor<1x64xf32>
+    %8247 = "ttir.relu"(%8245, %8246) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8248 = tensor.empty() : tensor<1x64xf32>
+    %8249 = "ttir.relu"(%8247, %8248) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8250 = tensor.empty() : tensor<1x64xf32>
+    %8251 = "ttir.relu"(%8249, %8250) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8252 = tensor.empty() : tensor<1x64xf32>
+    %8253 = "ttir.relu"(%8251, %8252) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8254 = tensor.empty() : tensor<1x64xf32>
+    %8255 = "ttir.relu"(%8253, %8254) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8256 = tensor.empty() : tensor<1x64xf32>
+    %8257 = "ttir.relu"(%8255, %8256) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8258 = tensor.empty() : tensor<1x64xf32>
+    %8259 = "ttir.relu"(%8257, %8258) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8260 = tensor.empty() : tensor<1x64xf32>
+    %8261 = "ttir.relu"(%8259, %8260) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8262 = tensor.empty() : tensor<1x64xf32>
+    %8263 = "ttir.relu"(%8261, %8262) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8264 = tensor.empty() : tensor<1x64xf32>
+    %8265 = "ttir.relu"(%8263, %8264) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8266 = tensor.empty() : tensor<1x64xf32>
+    %8267 = "ttir.relu"(%8265, %8266) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8268 = tensor.empty() : tensor<1x64xf32>
+    %8269 = "ttir.relu"(%8267, %8268) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8270 = tensor.empty() : tensor<1x64xf32>
+    %8271 = "ttir.relu"(%8269, %8270) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8272 = tensor.empty() : tensor<1x64xf32>
+    %8273 = "ttir.relu"(%8271, %8272) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8274 = tensor.empty() : tensor<1x64xf32>
+    %8275 = "ttir.relu"(%8273, %8274) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8276 = tensor.empty() : tensor<1x64xf32>
+    %8277 = "ttir.relu"(%8275, %8276) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8278 = tensor.empty() : tensor<1x64xf32>
+    %8279 = "ttir.relu"(%8277, %8278) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8280 = tensor.empty() : tensor<1x64xf32>
+    %8281 = "ttir.relu"(%8279, %8280) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8282 = tensor.empty() : tensor<1x64xf32>
+    %8283 = "ttir.relu"(%8281, %8282) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8284 = tensor.empty() : tensor<1x64xf32>
+    %8285 = "ttir.relu"(%8283, %8284) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8286 = tensor.empty() : tensor<1x64xf32>
+    %8287 = "ttir.relu"(%8285, %8286) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8288 = tensor.empty() : tensor<1x64xf32>
+    %8289 = "ttir.relu"(%8287, %8288) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8290 = tensor.empty() : tensor<1x64xf32>
+    %8291 = "ttir.relu"(%8289, %8290) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8292 = tensor.empty() : tensor<1x64xf32>
+    %8293 = "ttir.relu"(%8291, %8292) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8294 = tensor.empty() : tensor<1x64xf32>
+    %8295 = "ttir.relu"(%8293, %8294) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8296 = tensor.empty() : tensor<1x64xf32>
+    %8297 = "ttir.relu"(%8295, %8296) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8298 = tensor.empty() : tensor<1x64xf32>
+    %8299 = "ttir.relu"(%8297, %8298) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8300 = tensor.empty() : tensor<1x64xf32>
+    %8301 = "ttir.relu"(%8299, %8300) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8302 = tensor.empty() : tensor<1x64xf32>
+    %8303 = "ttir.relu"(%8301, %8302) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8304 = tensor.empty() : tensor<1x64xf32>
+    %8305 = "ttir.relu"(%8303, %8304) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8306 = tensor.empty() : tensor<1x64xf32>
+    %8307 = "ttir.relu"(%8305, %8306) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8308 = tensor.empty() : tensor<1x64xf32>
+    %8309 = "ttir.relu"(%8307, %8308) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8310 = tensor.empty() : tensor<1x64xf32>
+    %8311 = "ttir.relu"(%8309, %8310) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8312 = tensor.empty() : tensor<1x64xf32>
+    %8313 = "ttir.relu"(%8311, %8312) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8314 = tensor.empty() : tensor<1x64xf32>
+    %8315 = "ttir.relu"(%8313, %8314) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8316 = tensor.empty() : tensor<1x64xf32>
+    %8317 = "ttir.relu"(%8315, %8316) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8318 = tensor.empty() : tensor<1x64xf32>
+    %8319 = "ttir.relu"(%8317, %8318) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8320 = tensor.empty() : tensor<1x64xf32>
+    %8321 = "ttir.relu"(%8319, %8320) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8322 = tensor.empty() : tensor<1x64xf32>
+    %8323 = "ttir.relu"(%8321, %8322) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8324 = tensor.empty() : tensor<1x64xf32>
+    %8325 = "ttir.relu"(%8323, %8324) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8326 = tensor.empty() : tensor<1x64xf32>
+    %8327 = "ttir.relu"(%8325, %8326) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8328 = tensor.empty() : tensor<1x64xf32>
+    %8329 = "ttir.relu"(%8327, %8328) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8330 = tensor.empty() : tensor<1x64xf32>
+    %8331 = "ttir.relu"(%8329, %8330) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8332 = tensor.empty() : tensor<1x64xf32>
+    %8333 = "ttir.relu"(%8331, %8332) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8334 = tensor.empty() : tensor<1x64xf32>
+    %8335 = "ttir.relu"(%8333, %8334) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8336 = tensor.empty() : tensor<1x64xf32>
+    %8337 = "ttir.relu"(%8335, %8336) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8338 = tensor.empty() : tensor<1x64xf32>
+    %8339 = "ttir.relu"(%8337, %8338) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8340 = tensor.empty() : tensor<1x64xf32>
+    %8341 = "ttir.relu"(%8339, %8340) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8342 = tensor.empty() : tensor<1x64xf32>
+    %8343 = "ttir.relu"(%8341, %8342) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8344 = tensor.empty() : tensor<1x64xf32>
+    %8345 = "ttir.relu"(%8343, %8344) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8346 = tensor.empty() : tensor<1x64xf32>
+    %8347 = "ttir.relu"(%8345, %8346) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8348 = tensor.empty() : tensor<1x64xf32>
+    %8349 = "ttir.relu"(%8347, %8348) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8350 = tensor.empty() : tensor<1x64xf32>
+    %8351 = "ttir.relu"(%8349, %8350) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8352 = tensor.empty() : tensor<1x64xf32>
+    %8353 = "ttir.relu"(%8351, %8352) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8354 = tensor.empty() : tensor<1x64xf32>
+    %8355 = "ttir.relu"(%8353, %8354) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8356 = tensor.empty() : tensor<1x64xf32>
+    %8357 = "ttir.relu"(%8355, %8356) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8358 = tensor.empty() : tensor<1x64xf32>
+    %8359 = "ttir.relu"(%8357, %8358) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8360 = tensor.empty() : tensor<1x64xf32>
+    %8361 = "ttir.relu"(%8359, %8360) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8362 = tensor.empty() : tensor<1x64xf32>
+    %8363 = "ttir.relu"(%8361, %8362) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8364 = tensor.empty() : tensor<1x64xf32>
+    %8365 = "ttir.relu"(%8363, %8364) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8366 = tensor.empty() : tensor<1x64xf32>
+    %8367 = "ttir.relu"(%8365, %8366) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8368 = tensor.empty() : tensor<1x64xf32>
+    %8369 = "ttir.relu"(%8367, %8368) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8370 = tensor.empty() : tensor<1x64xf32>
+    %8371 = "ttir.relu"(%8369, %8370) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8372 = tensor.empty() : tensor<1x64xf32>
+    %8373 = "ttir.relu"(%8371, %8372) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8374 = tensor.empty() : tensor<1x64xf32>
+    %8375 = "ttir.relu"(%8373, %8374) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8376 = tensor.empty() : tensor<1x64xf32>
+    %8377 = "ttir.relu"(%8375, %8376) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8378 = tensor.empty() : tensor<1x64xf32>
+    %8379 = "ttir.relu"(%8377, %8378) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8380 = tensor.empty() : tensor<1x64xf32>
+    %8381 = "ttir.relu"(%8379, %8380) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8382 = tensor.empty() : tensor<1x64xf32>
+    %8383 = "ttir.relu"(%8381, %8382) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8384 = tensor.empty() : tensor<1x64xf32>
+    %8385 = "ttir.relu"(%8383, %8384) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8386 = tensor.empty() : tensor<1x64xf32>
+    %8387 = "ttir.relu"(%8385, %8386) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8388 = tensor.empty() : tensor<1x64xf32>
+    %8389 = "ttir.relu"(%8387, %8388) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8390 = tensor.empty() : tensor<1x64xf32>
+    %8391 = "ttir.relu"(%8389, %8390) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8392 = tensor.empty() : tensor<1x64xf32>
+    %8393 = "ttir.relu"(%8391, %8392) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8394 = tensor.empty() : tensor<1x64xf32>
+    %8395 = "ttir.relu"(%8393, %8394) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8396 = tensor.empty() : tensor<1x64xf32>
+    %8397 = "ttir.relu"(%8395, %8396) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8398 = tensor.empty() : tensor<1x64xf32>
+    %8399 = "ttir.relu"(%8397, %8398) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8400 = tensor.empty() : tensor<1x64xf32>
+    %8401 = "ttir.relu"(%8399, %8400) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8402 = tensor.empty() : tensor<1x64xf32>
+    %8403 = "ttir.relu"(%8401, %8402) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8404 = tensor.empty() : tensor<1x64xf32>
+    %8405 = "ttir.relu"(%8403, %8404) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8406 = tensor.empty() : tensor<1x64xf32>
+    %8407 = "ttir.relu"(%8405, %8406) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8408 = tensor.empty() : tensor<1x64xf32>
+    %8409 = "ttir.relu"(%8407, %8408) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8410 = tensor.empty() : tensor<1x64xf32>
+    %8411 = "ttir.relu"(%8409, %8410) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8412 = tensor.empty() : tensor<1x64xf32>
+    %8413 = "ttir.relu"(%8411, %8412) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8414 = tensor.empty() : tensor<1x64xf32>
+    %8415 = "ttir.relu"(%8413, %8414) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8416 = tensor.empty() : tensor<1x64xf32>
+    %8417 = "ttir.relu"(%8415, %8416) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8418 = tensor.empty() : tensor<1x64xf32>
+    %8419 = "ttir.relu"(%8417, %8418) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8420 = tensor.empty() : tensor<1x64xf32>
+    %8421 = "ttir.relu"(%8419, %8420) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8422 = tensor.empty() : tensor<1x64xf32>
+    %8423 = "ttir.relu"(%8421, %8422) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8424 = tensor.empty() : tensor<1x64xf32>
+    %8425 = "ttir.relu"(%8423, %8424) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8426 = tensor.empty() : tensor<1x64xf32>
+    %8427 = "ttir.relu"(%8425, %8426) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8428 = tensor.empty() : tensor<1x64xf32>
+    %8429 = "ttir.relu"(%8427, %8428) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8430 = tensor.empty() : tensor<1x64xf32>
+    %8431 = "ttir.relu"(%8429, %8430) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8432 = tensor.empty() : tensor<1x64xf32>
+    %8433 = "ttir.relu"(%8431, %8432) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8434 = tensor.empty() : tensor<1x64xf32>
+    %8435 = "ttir.relu"(%8433, %8434) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8436 = tensor.empty() : tensor<1x64xf32>
+    %8437 = "ttir.relu"(%8435, %8436) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8438 = tensor.empty() : tensor<1x64xf32>
+    %8439 = "ttir.relu"(%8437, %8438) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8440 = tensor.empty() : tensor<1x64xf32>
+    %8441 = "ttir.relu"(%8439, %8440) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8442 = tensor.empty() : tensor<1x64xf32>
+    %8443 = "ttir.relu"(%8441, %8442) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8444 = tensor.empty() : tensor<1x64xf32>
+    %8445 = "ttir.relu"(%8443, %8444) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8446 = tensor.empty() : tensor<1x64xf32>
+    %8447 = "ttir.relu"(%8445, %8446) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8448 = tensor.empty() : tensor<1x64xf32>
+    %8449 = "ttir.relu"(%8447, %8448) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8450 = tensor.empty() : tensor<1x64xf32>
+    %8451 = "ttir.relu"(%8449, %8450) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8452 = tensor.empty() : tensor<1x64xf32>
+    %8453 = "ttir.relu"(%8451, %8452) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8454 = tensor.empty() : tensor<1x64xf32>
+    %8455 = "ttir.relu"(%8453, %8454) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8456 = tensor.empty() : tensor<1x64xf32>
+    %8457 = "ttir.relu"(%8455, %8456) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8458 = tensor.empty() : tensor<1x64xf32>
+    %8459 = "ttir.relu"(%8457, %8458) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8460 = tensor.empty() : tensor<1x64xf32>
+    %8461 = "ttir.relu"(%8459, %8460) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8462 = tensor.empty() : tensor<1x64xf32>
+    %8463 = "ttir.relu"(%8461, %8462) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8464 = tensor.empty() : tensor<1x64xf32>
+    %8465 = "ttir.relu"(%8463, %8464) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8466 = tensor.empty() : tensor<1x64xf32>
+    %8467 = "ttir.relu"(%8465, %8466) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8468 = tensor.empty() : tensor<1x64xf32>
+    %8469 = "ttir.relu"(%8467, %8468) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8470 = tensor.empty() : tensor<1x64xf32>
+    %8471 = "ttir.relu"(%8469, %8470) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8472 = tensor.empty() : tensor<1x64xf32>
+    %8473 = "ttir.relu"(%8471, %8472) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8474 = tensor.empty() : tensor<1x64xf32>
+    %8475 = "ttir.relu"(%8473, %8474) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8476 = tensor.empty() : tensor<1x64xf32>
+    %8477 = "ttir.relu"(%8475, %8476) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8478 = tensor.empty() : tensor<1x64xf32>
+    %8479 = "ttir.relu"(%8477, %8478) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8480 = tensor.empty() : tensor<1x64xf32>
+    %8481 = "ttir.relu"(%8479, %8480) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8482 = tensor.empty() : tensor<1x64xf32>
+    %8483 = "ttir.relu"(%8481, %8482) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8484 = tensor.empty() : tensor<1x64xf32>
+    %8485 = "ttir.relu"(%8483, %8484) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8486 = tensor.empty() : tensor<1x64xf32>
+    %8487 = "ttir.relu"(%8485, %8486) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8488 = tensor.empty() : tensor<1x64xf32>
+    %8489 = "ttir.relu"(%8487, %8488) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8490 = tensor.empty() : tensor<1x64xf32>
+    %8491 = "ttir.relu"(%8489, %8490) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8492 = tensor.empty() : tensor<1x64xf32>
+    %8493 = "ttir.relu"(%8491, %8492) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8494 = tensor.empty() : tensor<1x64xf32>
+    %8495 = "ttir.relu"(%8493, %8494) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8496 = tensor.empty() : tensor<1x64xf32>
+    %8497 = "ttir.relu"(%8495, %8496) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8498 = tensor.empty() : tensor<1x64xf32>
+    %8499 = "ttir.relu"(%8497, %8498) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8500 = tensor.empty() : tensor<1x64xf32>
+    %8501 = "ttir.relu"(%8499, %8500) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8502 = tensor.empty() : tensor<1x64xf32>
+    %8503 = "ttir.relu"(%8501, %8502) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8504 = tensor.empty() : tensor<1x64xf32>
+    %8505 = "ttir.relu"(%8503, %8504) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8506 = tensor.empty() : tensor<1x64xf32>
+    %8507 = "ttir.relu"(%8505, %8506) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8508 = tensor.empty() : tensor<1x64xf32>
+    %8509 = "ttir.relu"(%8507, %8508) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8510 = tensor.empty() : tensor<1x64xf32>
+    %8511 = "ttir.relu"(%8509, %8510) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8512 = tensor.empty() : tensor<1x64xf32>
+    %8513 = "ttir.relu"(%8511, %8512) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8514 = tensor.empty() : tensor<1x64xf32>
+    %8515 = "ttir.relu"(%8513, %8514) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8516 = tensor.empty() : tensor<1x64xf32>
+    %8517 = "ttir.relu"(%8515, %8516) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8518 = tensor.empty() : tensor<1x64xf32>
+    %8519 = "ttir.relu"(%8517, %8518) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8520 = tensor.empty() : tensor<1x64xf32>
+    %8521 = "ttir.relu"(%8519, %8520) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8522 = tensor.empty() : tensor<1x64xf32>
+    %8523 = "ttir.relu"(%8521, %8522) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8524 = tensor.empty() : tensor<1x64xf32>
+    %8525 = "ttir.relu"(%8523, %8524) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8526 = tensor.empty() : tensor<1x64xf32>
+    %8527 = "ttir.relu"(%8525, %8526) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8528 = tensor.empty() : tensor<1x64xf32>
+    %8529 = "ttir.relu"(%8527, %8528) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8530 = tensor.empty() : tensor<1x64xf32>
+    %8531 = "ttir.relu"(%8529, %8530) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8532 = tensor.empty() : tensor<1x64xf32>
+    %8533 = "ttir.relu"(%8531, %8532) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8534 = tensor.empty() : tensor<1x64xf32>
+    %8535 = "ttir.relu"(%8533, %8534) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8536 = tensor.empty() : tensor<1x64xf32>
+    %8537 = "ttir.relu"(%8535, %8536) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8538 = tensor.empty() : tensor<1x64xf32>
+    %8539 = "ttir.relu"(%8537, %8538) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8540 = tensor.empty() : tensor<1x64xf32>
+    %8541 = "ttir.relu"(%8539, %8540) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8542 = tensor.empty() : tensor<1x64xf32>
+    %8543 = "ttir.relu"(%8541, %8542) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8544 = tensor.empty() : tensor<1x64xf32>
+    %8545 = "ttir.relu"(%8543, %8544) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8546 = tensor.empty() : tensor<1x64xf32>
+    %8547 = "ttir.relu"(%8545, %8546) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8548 = tensor.empty() : tensor<1x64xf32>
+    %8549 = "ttir.relu"(%8547, %8548) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8550 = tensor.empty() : tensor<1x64xf32>
+    %8551 = "ttir.relu"(%8549, %8550) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8552 = tensor.empty() : tensor<1x64xf32>
+    %8553 = "ttir.relu"(%8551, %8552) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8554 = tensor.empty() : tensor<1x64xf32>
+    %8555 = "ttir.relu"(%8553, %8554) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8556 = tensor.empty() : tensor<1x64xf32>
+    %8557 = "ttir.relu"(%8555, %8556) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8558 = tensor.empty() : tensor<1x64xf32>
+    %8559 = "ttir.relu"(%8557, %8558) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8560 = tensor.empty() : tensor<1x64xf32>
+    %8561 = "ttir.relu"(%8559, %8560) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8562 = tensor.empty() : tensor<1x64xf32>
+    %8563 = "ttir.relu"(%8561, %8562) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8564 = tensor.empty() : tensor<1x64xf32>
+    %8565 = "ttir.relu"(%8563, %8564) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8566 = tensor.empty() : tensor<1x64xf32>
+    %8567 = "ttir.relu"(%8565, %8566) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8568 = tensor.empty() : tensor<1x64xf32>
+    %8569 = "ttir.relu"(%8567, %8568) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8570 = tensor.empty() : tensor<1x64xf32>
+    %8571 = "ttir.relu"(%8569, %8570) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8572 = tensor.empty() : tensor<1x64xf32>
+    %8573 = "ttir.relu"(%8571, %8572) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8574 = tensor.empty() : tensor<1x64xf32>
+    %8575 = "ttir.relu"(%8573, %8574) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8576 = tensor.empty() : tensor<1x64xf32>
+    %8577 = "ttir.relu"(%8575, %8576) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8578 = tensor.empty() : tensor<1x64xf32>
+    %8579 = "ttir.relu"(%8577, %8578) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8580 = tensor.empty() : tensor<1x64xf32>
+    %8581 = "ttir.relu"(%8579, %8580) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8582 = tensor.empty() : tensor<1x64xf32>
+    %8583 = "ttir.relu"(%8581, %8582) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8584 = tensor.empty() : tensor<1x64xf32>
+    %8585 = "ttir.relu"(%8583, %8584) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8586 = tensor.empty() : tensor<1x64xf32>
+    %8587 = "ttir.relu"(%8585, %8586) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8588 = tensor.empty() : tensor<1x64xf32>
+    %8589 = "ttir.relu"(%8587, %8588) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8590 = tensor.empty() : tensor<1x64xf32>
+    %8591 = "ttir.relu"(%8589, %8590) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8592 = tensor.empty() : tensor<1x64xf32>
+    %8593 = "ttir.relu"(%8591, %8592) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8594 = tensor.empty() : tensor<1x64xf32>
+    %8595 = "ttir.relu"(%8593, %8594) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8596 = tensor.empty() : tensor<1x64xf32>
+    %8597 = "ttir.relu"(%8595, %8596) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8598 = tensor.empty() : tensor<1x64xf32>
+    %8599 = "ttir.relu"(%8597, %8598) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8600 = tensor.empty() : tensor<1x64xf32>
+    %8601 = "ttir.relu"(%8599, %8600) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8602 = tensor.empty() : tensor<1x64xf32>
+    %8603 = "ttir.relu"(%8601, %8602) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8604 = tensor.empty() : tensor<1x64xf32>
+    %8605 = "ttir.relu"(%8603, %8604) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8606 = tensor.empty() : tensor<1x64xf32>
+    %8607 = "ttir.relu"(%8605, %8606) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8608 = tensor.empty() : tensor<1x64xf32>
+    %8609 = "ttir.relu"(%8607, %8608) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8610 = tensor.empty() : tensor<1x64xf32>
+    %8611 = "ttir.relu"(%8609, %8610) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8612 = tensor.empty() : tensor<1x64xf32>
+    %8613 = "ttir.relu"(%8611, %8612) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8614 = tensor.empty() : tensor<1x64xf32>
+    %8615 = "ttir.relu"(%8613, %8614) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8616 = tensor.empty() : tensor<1x64xf32>
+    %8617 = "ttir.relu"(%8615, %8616) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8618 = tensor.empty() : tensor<1x64xf32>
+    %8619 = "ttir.relu"(%8617, %8618) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8620 = tensor.empty() : tensor<1x64xf32>
+    %8621 = "ttir.relu"(%8619, %8620) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8622 = tensor.empty() : tensor<1x64xf32>
+    %8623 = "ttir.relu"(%8621, %8622) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8624 = tensor.empty() : tensor<1x64xf32>
+    %8625 = "ttir.relu"(%8623, %8624) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8626 = tensor.empty() : tensor<1x64xf32>
+    %8627 = "ttir.relu"(%8625, %8626) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8628 = tensor.empty() : tensor<1x64xf32>
+    %8629 = "ttir.relu"(%8627, %8628) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8630 = tensor.empty() : tensor<1x64xf32>
+    %8631 = "ttir.relu"(%8629, %8630) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8632 = tensor.empty() : tensor<1x64xf32>
+    %8633 = "ttir.relu"(%8631, %8632) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8634 = tensor.empty() : tensor<1x64xf32>
+    %8635 = "ttir.relu"(%8633, %8634) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8636 = tensor.empty() : tensor<1x64xf32>
+    %8637 = "ttir.relu"(%8635, %8636) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8638 = tensor.empty() : tensor<1x64xf32>
+    %8639 = "ttir.relu"(%8637, %8638) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8640 = tensor.empty() : tensor<1x64xf32>
+    %8641 = "ttir.relu"(%8639, %8640) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8642 = tensor.empty() : tensor<1x64xf32>
+    %8643 = "ttir.relu"(%8641, %8642) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8644 = tensor.empty() : tensor<1x64xf32>
+    %8645 = "ttir.relu"(%8643, %8644) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8646 = tensor.empty() : tensor<1x64xf32>
+    %8647 = "ttir.relu"(%8645, %8646) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8648 = tensor.empty() : tensor<1x64xf32>
+    %8649 = "ttir.relu"(%8647, %8648) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8650 = tensor.empty() : tensor<1x64xf32>
+    %8651 = "ttir.relu"(%8649, %8650) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8652 = tensor.empty() : tensor<1x64xf32>
+    %8653 = "ttir.relu"(%8651, %8652) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8654 = tensor.empty() : tensor<1x64xf32>
+    %8655 = "ttir.relu"(%8653, %8654) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8656 = tensor.empty() : tensor<1x64xf32>
+    %8657 = "ttir.relu"(%8655, %8656) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8658 = tensor.empty() : tensor<1x64xf32>
+    %8659 = "ttir.relu"(%8657, %8658) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8660 = tensor.empty() : tensor<1x64xf32>
+    %8661 = "ttir.relu"(%8659, %8660) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8662 = tensor.empty() : tensor<1x64xf32>
+    %8663 = "ttir.relu"(%8661, %8662) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8664 = tensor.empty() : tensor<1x64xf32>
+    %8665 = "ttir.relu"(%8663, %8664) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8666 = tensor.empty() : tensor<1x64xf32>
+    %8667 = "ttir.relu"(%8665, %8666) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8668 = tensor.empty() : tensor<1x64xf32>
+    %8669 = "ttir.relu"(%8667, %8668) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8670 = tensor.empty() : tensor<1x64xf32>
+    %8671 = "ttir.relu"(%8669, %8670) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8672 = tensor.empty() : tensor<1x64xf32>
+    %8673 = "ttir.relu"(%8671, %8672) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8674 = tensor.empty() : tensor<1x64xf32>
+    %8675 = "ttir.relu"(%8673, %8674) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8676 = tensor.empty() : tensor<1x64xf32>
+    %8677 = "ttir.relu"(%8675, %8676) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8678 = tensor.empty() : tensor<1x64xf32>
+    %8679 = "ttir.relu"(%8677, %8678) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8680 = tensor.empty() : tensor<1x64xf32>
+    %8681 = "ttir.relu"(%8679, %8680) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8682 = tensor.empty() : tensor<1x64xf32>
+    %8683 = "ttir.relu"(%8681, %8682) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8684 = tensor.empty() : tensor<1x64xf32>
+    %8685 = "ttir.relu"(%8683, %8684) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8686 = tensor.empty() : tensor<1x64xf32>
+    %8687 = "ttir.relu"(%8685, %8686) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8688 = tensor.empty() : tensor<1x64xf32>
+    %8689 = "ttir.relu"(%8687, %8688) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8690 = tensor.empty() : tensor<1x64xf32>
+    %8691 = "ttir.relu"(%8689, %8690) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8692 = tensor.empty() : tensor<1x64xf32>
+    %8693 = "ttir.relu"(%8691, %8692) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8694 = tensor.empty() : tensor<1x64xf32>
+    %8695 = "ttir.relu"(%8693, %8694) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8696 = tensor.empty() : tensor<1x64xf32>
+    %8697 = "ttir.relu"(%8695, %8696) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8698 = tensor.empty() : tensor<1x64xf32>
+    %8699 = "ttir.relu"(%8697, %8698) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8700 = tensor.empty() : tensor<1x64xf32>
+    %8701 = "ttir.relu"(%8699, %8700) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8702 = tensor.empty() : tensor<1x64xf32>
+    %8703 = "ttir.relu"(%8701, %8702) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8704 = tensor.empty() : tensor<1x64xf32>
+    %8705 = "ttir.relu"(%8703, %8704) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8706 = tensor.empty() : tensor<1x64xf32>
+    %8707 = "ttir.relu"(%8705, %8706) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8708 = tensor.empty() : tensor<1x64xf32>
+    %8709 = "ttir.relu"(%8707, %8708) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8710 = tensor.empty() : tensor<1x64xf32>
+    %8711 = "ttir.relu"(%8709, %8710) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8712 = tensor.empty() : tensor<1x64xf32>
+    %8713 = "ttir.relu"(%8711, %8712) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8714 = tensor.empty() : tensor<1x64xf32>
+    %8715 = "ttir.relu"(%8713, %8714) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8716 = tensor.empty() : tensor<1x64xf32>
+    %8717 = "ttir.relu"(%8715, %8716) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8718 = tensor.empty() : tensor<1x64xf32>
+    %8719 = "ttir.relu"(%8717, %8718) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8720 = tensor.empty() : tensor<1x64xf32>
+    %8721 = "ttir.relu"(%8719, %8720) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8722 = tensor.empty() : tensor<1x64xf32>
+    %8723 = "ttir.relu"(%8721, %8722) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8724 = tensor.empty() : tensor<1x64xf32>
+    %8725 = "ttir.relu"(%8723, %8724) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8726 = tensor.empty() : tensor<1x64xf32>
+    %8727 = "ttir.relu"(%8725, %8726) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8728 = tensor.empty() : tensor<1x64xf32>
+    %8729 = "ttir.relu"(%8727, %8728) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8730 = tensor.empty() : tensor<1x64xf32>
+    %8731 = "ttir.relu"(%8729, %8730) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8732 = tensor.empty() : tensor<1x64xf32>
+    %8733 = "ttir.relu"(%8731, %8732) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8734 = tensor.empty() : tensor<1x64xf32>
+    %8735 = "ttir.relu"(%8733, %8734) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8736 = tensor.empty() : tensor<1x64xf32>
+    %8737 = "ttir.relu"(%8735, %8736) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8738 = tensor.empty() : tensor<1x64xf32>
+    %8739 = "ttir.relu"(%8737, %8738) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8740 = tensor.empty() : tensor<1x64xf32>
+    %8741 = "ttir.relu"(%8739, %8740) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8742 = tensor.empty() : tensor<1x64xf32>
+    %8743 = "ttir.relu"(%8741, %8742) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8744 = tensor.empty() : tensor<1x64xf32>
+    %8745 = "ttir.relu"(%8743, %8744) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8746 = tensor.empty() : tensor<1x64xf32>
+    %8747 = "ttir.relu"(%8745, %8746) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8748 = tensor.empty() : tensor<1x64xf32>
+    %8749 = "ttir.relu"(%8747, %8748) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8750 = tensor.empty() : tensor<1x64xf32>
+    %8751 = "ttir.relu"(%8749, %8750) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8752 = tensor.empty() : tensor<1x64xf32>
+    %8753 = "ttir.relu"(%8751, %8752) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8754 = tensor.empty() : tensor<1x64xf32>
+    %8755 = "ttir.relu"(%8753, %8754) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8756 = tensor.empty() : tensor<1x64xf32>
+    %8757 = "ttir.relu"(%8755, %8756) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8758 = tensor.empty() : tensor<1x64xf32>
+    %8759 = "ttir.relu"(%8757, %8758) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8760 = tensor.empty() : tensor<1x64xf32>
+    %8761 = "ttir.relu"(%8759, %8760) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8762 = tensor.empty() : tensor<1x64xf32>
+    %8763 = "ttir.relu"(%8761, %8762) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8764 = tensor.empty() : tensor<1x64xf32>
+    %8765 = "ttir.relu"(%8763, %8764) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8766 = tensor.empty() : tensor<1x64xf32>
+    %8767 = "ttir.relu"(%8765, %8766) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8768 = tensor.empty() : tensor<1x64xf32>
+    %8769 = "ttir.relu"(%8767, %8768) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8770 = tensor.empty() : tensor<1x64xf32>
+    %8771 = "ttir.relu"(%8769, %8770) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8772 = tensor.empty() : tensor<1x64xf32>
+    %8773 = "ttir.relu"(%8771, %8772) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8774 = tensor.empty() : tensor<1x64xf32>
+    %8775 = "ttir.relu"(%8773, %8774) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8776 = tensor.empty() : tensor<1x64xf32>
+    %8777 = "ttir.relu"(%8775, %8776) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8778 = tensor.empty() : tensor<1x64xf32>
+    %8779 = "ttir.relu"(%8777, %8778) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8780 = tensor.empty() : tensor<1x64xf32>
+    %8781 = "ttir.relu"(%8779, %8780) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8782 = tensor.empty() : tensor<1x64xf32>
+    %8783 = "ttir.relu"(%8781, %8782) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8784 = tensor.empty() : tensor<1x64xf32>
+    %8785 = "ttir.relu"(%8783, %8784) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8786 = tensor.empty() : tensor<1x64xf32>
+    %8787 = "ttir.relu"(%8785, %8786) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8788 = tensor.empty() : tensor<1x64xf32>
+    %8789 = "ttir.relu"(%8787, %8788) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8790 = tensor.empty() : tensor<1x64xf32>
+    %8791 = "ttir.relu"(%8789, %8790) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8792 = tensor.empty() : tensor<1x64xf32>
+    %8793 = "ttir.relu"(%8791, %8792) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8794 = tensor.empty() : tensor<1x64xf32>
+    %8795 = "ttir.relu"(%8793, %8794) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8796 = tensor.empty() : tensor<1x64xf32>
+    %8797 = "ttir.relu"(%8795, %8796) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8798 = tensor.empty() : tensor<1x64xf32>
+    %8799 = "ttir.relu"(%8797, %8798) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8800 = tensor.empty() : tensor<1x64xf32>
+    %8801 = "ttir.relu"(%8799, %8800) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8802 = tensor.empty() : tensor<1x64xf32>
+    %8803 = "ttir.relu"(%8801, %8802) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8804 = tensor.empty() : tensor<1x64xf32>
+    %8805 = "ttir.relu"(%8803, %8804) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8806 = tensor.empty() : tensor<1x64xf32>
+    %8807 = "ttir.relu"(%8805, %8806) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8808 = tensor.empty() : tensor<1x64xf32>
+    %8809 = "ttir.relu"(%8807, %8808) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8810 = tensor.empty() : tensor<1x64xf32>
+    %8811 = "ttir.relu"(%8809, %8810) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8812 = tensor.empty() : tensor<1x64xf32>
+    %8813 = "ttir.relu"(%8811, %8812) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8814 = tensor.empty() : tensor<1x64xf32>
+    %8815 = "ttir.relu"(%8813, %8814) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8816 = tensor.empty() : tensor<1x64xf32>
+    %8817 = "ttir.relu"(%8815, %8816) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8818 = tensor.empty() : tensor<1x64xf32>
+    %8819 = "ttir.relu"(%8817, %8818) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8820 = tensor.empty() : tensor<1x64xf32>
+    %8821 = "ttir.relu"(%8819, %8820) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8822 = tensor.empty() : tensor<1x64xf32>
+    %8823 = "ttir.relu"(%8821, %8822) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8824 = tensor.empty() : tensor<1x64xf32>
+    %8825 = "ttir.relu"(%8823, %8824) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8826 = tensor.empty() : tensor<1x64xf32>
+    %8827 = "ttir.relu"(%8825, %8826) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8828 = tensor.empty() : tensor<1x64xf32>
+    %8829 = "ttir.relu"(%8827, %8828) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8830 = tensor.empty() : tensor<1x64xf32>
+    %8831 = "ttir.relu"(%8829, %8830) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8832 = tensor.empty() : tensor<1x64xf32>
+    %8833 = "ttir.relu"(%8831, %8832) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8834 = tensor.empty() : tensor<1x64xf32>
+    %8835 = "ttir.relu"(%8833, %8834) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8836 = tensor.empty() : tensor<1x64xf32>
+    %8837 = "ttir.relu"(%8835, %8836) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8838 = tensor.empty() : tensor<1x64xf32>
+    %8839 = "ttir.relu"(%8837, %8838) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8840 = tensor.empty() : tensor<1x64xf32>
+    %8841 = "ttir.relu"(%8839, %8840) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8842 = tensor.empty() : tensor<1x64xf32>
+    %8843 = "ttir.relu"(%8841, %8842) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8844 = tensor.empty() : tensor<1x64xf32>
+    %8845 = "ttir.relu"(%8843, %8844) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8846 = tensor.empty() : tensor<1x64xf32>
+    %8847 = "ttir.relu"(%8845, %8846) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8848 = tensor.empty() : tensor<1x64xf32>
+    %8849 = "ttir.relu"(%8847, %8848) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8850 = tensor.empty() : tensor<1x64xf32>
+    %8851 = "ttir.relu"(%8849, %8850) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8852 = tensor.empty() : tensor<1x64xf32>
+    %8853 = "ttir.relu"(%8851, %8852) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8854 = tensor.empty() : tensor<1x64xf32>
+    %8855 = "ttir.relu"(%8853, %8854) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8856 = tensor.empty() : tensor<1x64xf32>
+    %8857 = "ttir.relu"(%8855, %8856) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8858 = tensor.empty() : tensor<1x64xf32>
+    %8859 = "ttir.relu"(%8857, %8858) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8860 = tensor.empty() : tensor<1x64xf32>
+    %8861 = "ttir.relu"(%8859, %8860) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8862 = tensor.empty() : tensor<1x64xf32>
+    %8863 = "ttir.relu"(%8861, %8862) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8864 = tensor.empty() : tensor<1x64xf32>
+    %8865 = "ttir.relu"(%8863, %8864) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8866 = tensor.empty() : tensor<1x64xf32>
+    %8867 = "ttir.relu"(%8865, %8866) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8868 = tensor.empty() : tensor<1x64xf32>
+    %8869 = "ttir.relu"(%8867, %8868) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8870 = tensor.empty() : tensor<1x64xf32>
+    %8871 = "ttir.relu"(%8869, %8870) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8872 = tensor.empty() : tensor<1x64xf32>
+    %8873 = "ttir.relu"(%8871, %8872) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8874 = tensor.empty() : tensor<1x64xf32>
+    %8875 = "ttir.relu"(%8873, %8874) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8876 = tensor.empty() : tensor<1x64xf32>
+    %8877 = "ttir.relu"(%8875, %8876) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8878 = tensor.empty() : tensor<1x64xf32>
+    %8879 = "ttir.relu"(%8877, %8878) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8880 = tensor.empty() : tensor<1x64xf32>
+    %8881 = "ttir.relu"(%8879, %8880) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8882 = tensor.empty() : tensor<1x64xf32>
+    %8883 = "ttir.relu"(%8881, %8882) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8884 = tensor.empty() : tensor<1x64xf32>
+    %8885 = "ttir.relu"(%8883, %8884) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8886 = tensor.empty() : tensor<1x64xf32>
+    %8887 = "ttir.relu"(%8885, %8886) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8888 = tensor.empty() : tensor<1x64xf32>
+    %8889 = "ttir.relu"(%8887, %8888) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8890 = tensor.empty() : tensor<1x64xf32>
+    %8891 = "ttir.relu"(%8889, %8890) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8892 = tensor.empty() : tensor<1x64xf32>
+    %8893 = "ttir.relu"(%8891, %8892) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8894 = tensor.empty() : tensor<1x64xf32>
+    %8895 = "ttir.relu"(%8893, %8894) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8896 = tensor.empty() : tensor<1x64xf32>
+    %8897 = "ttir.relu"(%8895, %8896) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8898 = tensor.empty() : tensor<1x64xf32>
+    %8899 = "ttir.relu"(%8897, %8898) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8900 = tensor.empty() : tensor<1x64xf32>
+    %8901 = "ttir.relu"(%8899, %8900) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8902 = tensor.empty() : tensor<1x64xf32>
+    %8903 = "ttir.relu"(%8901, %8902) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8904 = tensor.empty() : tensor<1x64xf32>
+    %8905 = "ttir.relu"(%8903, %8904) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8906 = tensor.empty() : tensor<1x64xf32>
+    %8907 = "ttir.relu"(%8905, %8906) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8908 = tensor.empty() : tensor<1x64xf32>
+    %8909 = "ttir.relu"(%8907, %8908) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8910 = tensor.empty() : tensor<1x64xf32>
+    %8911 = "ttir.relu"(%8909, %8910) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8912 = tensor.empty() : tensor<1x64xf32>
+    %8913 = "ttir.relu"(%8911, %8912) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8914 = tensor.empty() : tensor<1x64xf32>
+    %8915 = "ttir.relu"(%8913, %8914) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8916 = tensor.empty() : tensor<1x64xf32>
+    %8917 = "ttir.relu"(%8915, %8916) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8918 = tensor.empty() : tensor<1x64xf32>
+    %8919 = "ttir.relu"(%8917, %8918) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8920 = tensor.empty() : tensor<1x64xf32>
+    %8921 = "ttir.relu"(%8919, %8920) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8922 = tensor.empty() : tensor<1x64xf32>
+    %8923 = "ttir.relu"(%8921, %8922) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8924 = tensor.empty() : tensor<1x64xf32>
+    %8925 = "ttir.relu"(%8923, %8924) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8926 = tensor.empty() : tensor<1x64xf32>
+    %8927 = "ttir.relu"(%8925, %8926) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8928 = tensor.empty() : tensor<1x64xf32>
+    %8929 = "ttir.relu"(%8927, %8928) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8930 = tensor.empty() : tensor<1x64xf32>
+    %8931 = "ttir.relu"(%8929, %8930) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8932 = tensor.empty() : tensor<1x64xf32>
+    %8933 = "ttir.relu"(%8931, %8932) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8934 = tensor.empty() : tensor<1x64xf32>
+    %8935 = "ttir.relu"(%8933, %8934) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8936 = tensor.empty() : tensor<1x64xf32>
+    %8937 = "ttir.relu"(%8935, %8936) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8938 = tensor.empty() : tensor<1x64xf32>
+    %8939 = "ttir.relu"(%8937, %8938) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8940 = tensor.empty() : tensor<1x64xf32>
+    %8941 = "ttir.relu"(%8939, %8940) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8942 = tensor.empty() : tensor<1x64xf32>
+    %8943 = "ttir.relu"(%8941, %8942) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8944 = tensor.empty() : tensor<1x64xf32>
+    %8945 = "ttir.relu"(%8943, %8944) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8946 = tensor.empty() : tensor<1x64xf32>
+    %8947 = "ttir.relu"(%8945, %8946) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8948 = tensor.empty() : tensor<1x64xf32>
+    %8949 = "ttir.relu"(%8947, %8948) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8950 = tensor.empty() : tensor<1x64xf32>
+    %8951 = "ttir.relu"(%8949, %8950) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8952 = tensor.empty() : tensor<1x64xf32>
+    %8953 = "ttir.relu"(%8951, %8952) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8954 = tensor.empty() : tensor<1x64xf32>
+    %8955 = "ttir.relu"(%8953, %8954) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8956 = tensor.empty() : tensor<1x64xf32>
+    %8957 = "ttir.relu"(%8955, %8956) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8958 = tensor.empty() : tensor<1x64xf32>
+    %8959 = "ttir.relu"(%8957, %8958) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8960 = tensor.empty() : tensor<1x64xf32>
+    %8961 = "ttir.relu"(%8959, %8960) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8962 = tensor.empty() : tensor<1x64xf32>
+    %8963 = "ttir.relu"(%8961, %8962) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8964 = tensor.empty() : tensor<1x64xf32>
+    %8965 = "ttir.relu"(%8963, %8964) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8966 = tensor.empty() : tensor<1x64xf32>
+    %8967 = "ttir.relu"(%8965, %8966) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8968 = tensor.empty() : tensor<1x64xf32>
+    %8969 = "ttir.relu"(%8967, %8968) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8970 = tensor.empty() : tensor<1x64xf32>
+    %8971 = "ttir.relu"(%8969, %8970) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8972 = tensor.empty() : tensor<1x64xf32>
+    %8973 = "ttir.relu"(%8971, %8972) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8974 = tensor.empty() : tensor<1x64xf32>
+    %8975 = "ttir.relu"(%8973, %8974) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8976 = tensor.empty() : tensor<1x64xf32>
+    %8977 = "ttir.relu"(%8975, %8976) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8978 = tensor.empty() : tensor<1x64xf32>
+    %8979 = "ttir.relu"(%8977, %8978) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8980 = tensor.empty() : tensor<1x64xf32>
+    %8981 = "ttir.relu"(%8979, %8980) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8982 = tensor.empty() : tensor<1x64xf32>
+    %8983 = "ttir.relu"(%8981, %8982) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8984 = tensor.empty() : tensor<1x64xf32>
+    %8985 = "ttir.relu"(%8983, %8984) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8986 = tensor.empty() : tensor<1x64xf32>
+    %8987 = "ttir.relu"(%8985, %8986) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8988 = tensor.empty() : tensor<1x64xf32>
+    %8989 = "ttir.relu"(%8987, %8988) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8990 = tensor.empty() : tensor<1x64xf32>
+    %8991 = "ttir.relu"(%8989, %8990) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8992 = tensor.empty() : tensor<1x64xf32>
+    %8993 = "ttir.relu"(%8991, %8992) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8994 = tensor.empty() : tensor<1x64xf32>
+    %8995 = "ttir.relu"(%8993, %8994) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8996 = tensor.empty() : tensor<1x64xf32>
+    %8997 = "ttir.relu"(%8995, %8996) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8998 = tensor.empty() : tensor<1x64xf32>
+    %8999 = "ttir.relu"(%8997, %8998) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9000 = tensor.empty() : tensor<1x64xf32>
+    %9001 = "ttir.relu"(%8999, %9000) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9002 = tensor.empty() : tensor<1x64xf32>
+    %9003 = "ttir.relu"(%9001, %9002) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9004 = tensor.empty() : tensor<1x64xf32>
+    %9005 = "ttir.relu"(%9003, %9004) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9006 = tensor.empty() : tensor<1x64xf32>
+    %9007 = "ttir.relu"(%9005, %9006) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9008 = tensor.empty() : tensor<1x64xf32>
+    %9009 = "ttir.relu"(%9007, %9008) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9010 = tensor.empty() : tensor<1x64xf32>
+    %9011 = "ttir.relu"(%9009, %9010) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9012 = tensor.empty() : tensor<1x64xf32>
+    %9013 = "ttir.relu"(%9011, %9012) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9014 = tensor.empty() : tensor<1x64xf32>
+    %9015 = "ttir.relu"(%9013, %9014) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9016 = tensor.empty() : tensor<1x64xf32>
+    %9017 = "ttir.relu"(%9015, %9016) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9018 = tensor.empty() : tensor<1x64xf32>
+    %9019 = "ttir.relu"(%9017, %9018) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9020 = tensor.empty() : tensor<1x64xf32>
+    %9021 = "ttir.relu"(%9019, %9020) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9022 = tensor.empty() : tensor<1x64xf32>
+    %9023 = "ttir.relu"(%9021, %9022) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9024 = tensor.empty() : tensor<1x64xf32>
+    %9025 = "ttir.relu"(%9023, %9024) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9026 = tensor.empty() : tensor<1x64xf32>
+    %9027 = "ttir.relu"(%9025, %9026) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9028 = tensor.empty() : tensor<1x64xf32>
+    %9029 = "ttir.relu"(%9027, %9028) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9030 = tensor.empty() : tensor<1x64xf32>
+    %9031 = "ttir.relu"(%9029, %9030) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9032 = tensor.empty() : tensor<1x64xf32>
+    %9033 = "ttir.relu"(%9031, %9032) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9034 = tensor.empty() : tensor<1x64xf32>
+    %9035 = "ttir.relu"(%9033, %9034) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9036 = tensor.empty() : tensor<1x64xf32>
+    %9037 = "ttir.relu"(%9035, %9036) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9038 = tensor.empty() : tensor<1x64xf32>
+    %9039 = "ttir.relu"(%9037, %9038) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9040 = tensor.empty() : tensor<1x64xf32>
+    %9041 = "ttir.relu"(%9039, %9040) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9042 = tensor.empty() : tensor<1x64xf32>
+    %9043 = "ttir.relu"(%9041, %9042) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9044 = tensor.empty() : tensor<1x64xf32>
+    %9045 = "ttir.relu"(%9043, %9044) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9046 = tensor.empty() : tensor<1x64xf32>
+    %9047 = "ttir.relu"(%9045, %9046) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9048 = tensor.empty() : tensor<1x64xf32>
+    %9049 = "ttir.relu"(%9047, %9048) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9050 = tensor.empty() : tensor<1x64xf32>
+    %9051 = "ttir.relu"(%9049, %9050) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9052 = tensor.empty() : tensor<1x64xf32>
+    %9053 = "ttir.relu"(%9051, %9052) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9054 = tensor.empty() : tensor<1x64xf32>
+    %9055 = "ttir.relu"(%9053, %9054) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9056 = tensor.empty() : tensor<1x64xf32>
+    %9057 = "ttir.relu"(%9055, %9056) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9058 = tensor.empty() : tensor<1x64xf32>
+    %9059 = "ttir.relu"(%9057, %9058) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9060 = tensor.empty() : tensor<1x64xf32>
+    %9061 = "ttir.relu"(%9059, %9060) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9062 = tensor.empty() : tensor<1x64xf32>
+    %9063 = "ttir.relu"(%9061, %9062) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9064 = tensor.empty() : tensor<1x64xf32>
+    %9065 = "ttir.relu"(%9063, %9064) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9066 = tensor.empty() : tensor<1x64xf32>
+    %9067 = "ttir.relu"(%9065, %9066) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9068 = tensor.empty() : tensor<1x64xf32>
+    %9069 = "ttir.relu"(%9067, %9068) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9070 = tensor.empty() : tensor<1x64xf32>
+    %9071 = "ttir.relu"(%9069, %9070) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9072 = tensor.empty() : tensor<1x64xf32>
+    %9073 = "ttir.relu"(%9071, %9072) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9074 = tensor.empty() : tensor<1x64xf32>
+    %9075 = "ttir.relu"(%9073, %9074) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9076 = tensor.empty() : tensor<1x64xf32>
+    %9077 = "ttir.relu"(%9075, %9076) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9078 = tensor.empty() : tensor<1x64xf32>
+    %9079 = "ttir.relu"(%9077, %9078) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9080 = tensor.empty() : tensor<1x64xf32>
+    %9081 = "ttir.relu"(%9079, %9080) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9082 = tensor.empty() : tensor<1x64xf32>
+    %9083 = "ttir.relu"(%9081, %9082) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9084 = tensor.empty() : tensor<1x64xf32>
+    %9085 = "ttir.relu"(%9083, %9084) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9086 = tensor.empty() : tensor<1x64xf32>
+    %9087 = "ttir.relu"(%9085, %9086) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9088 = tensor.empty() : tensor<1x64xf32>
+    %9089 = "ttir.relu"(%9087, %9088) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9090 = tensor.empty() : tensor<1x64xf32>
+    %9091 = "ttir.relu"(%9089, %9090) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9092 = tensor.empty() : tensor<1x64xf32>
+    %9093 = "ttir.relu"(%9091, %9092) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9094 = tensor.empty() : tensor<1x64xf32>
+    %9095 = "ttir.relu"(%9093, %9094) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9096 = tensor.empty() : tensor<1x64xf32>
+    %9097 = "ttir.relu"(%9095, %9096) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9098 = tensor.empty() : tensor<1x64xf32>
+    %9099 = "ttir.relu"(%9097, %9098) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9100 = tensor.empty() : tensor<1x64xf32>
+    %9101 = "ttir.relu"(%9099, %9100) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9102 = tensor.empty() : tensor<1x64xf32>
+    %9103 = "ttir.relu"(%9101, %9102) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9104 = tensor.empty() : tensor<1x64xf32>
+    %9105 = "ttir.relu"(%9103, %9104) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9106 = tensor.empty() : tensor<1x64xf32>
+    %9107 = "ttir.relu"(%9105, %9106) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9108 = tensor.empty() : tensor<1x64xf32>
+    %9109 = "ttir.relu"(%9107, %9108) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9110 = tensor.empty() : tensor<1x64xf32>
+    %9111 = "ttir.relu"(%9109, %9110) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9112 = tensor.empty() : tensor<1x64xf32>
+    %9113 = "ttir.relu"(%9111, %9112) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9114 = tensor.empty() : tensor<1x64xf32>
+    %9115 = "ttir.relu"(%9113, %9114) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9116 = tensor.empty() : tensor<1x64xf32>
+    %9117 = "ttir.relu"(%9115, %9116) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9118 = tensor.empty() : tensor<1x64xf32>
+    %9119 = "ttir.relu"(%9117, %9118) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9120 = tensor.empty() : tensor<1x64xf32>
+    %9121 = "ttir.relu"(%9119, %9120) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9122 = tensor.empty() : tensor<1x64xf32>
+    %9123 = "ttir.relu"(%9121, %9122) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9124 = tensor.empty() : tensor<1x64xf32>
+    %9125 = "ttir.relu"(%9123, %9124) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9126 = tensor.empty() : tensor<1x64xf32>
+    %9127 = "ttir.relu"(%9125, %9126) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9128 = tensor.empty() : tensor<1x64xf32>
+    %9129 = "ttir.relu"(%9127, %9128) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9130 = tensor.empty() : tensor<1x64xf32>
+    %9131 = "ttir.relu"(%9129, %9130) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9132 = tensor.empty() : tensor<1x64xf32>
+    %9133 = "ttir.relu"(%9131, %9132) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9134 = tensor.empty() : tensor<1x64xf32>
+    %9135 = "ttir.relu"(%9133, %9134) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9136 = tensor.empty() : tensor<1x64xf32>
+    %9137 = "ttir.relu"(%9135, %9136) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9138 = tensor.empty() : tensor<1x64xf32>
+    %9139 = "ttir.relu"(%9137, %9138) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9140 = tensor.empty() : tensor<1x64xf32>
+    %9141 = "ttir.relu"(%9139, %9140) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9142 = tensor.empty() : tensor<1x64xf32>
+    %9143 = "ttir.relu"(%9141, %9142) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9144 = tensor.empty() : tensor<1x64xf32>
+    %9145 = "ttir.relu"(%9143, %9144) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9146 = tensor.empty() : tensor<1x64xf32>
+    %9147 = "ttir.relu"(%9145, %9146) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9148 = tensor.empty() : tensor<1x64xf32>
+    %9149 = "ttir.relu"(%9147, %9148) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9150 = tensor.empty() : tensor<1x64xf32>
+    %9151 = "ttir.relu"(%9149, %9150) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9152 = tensor.empty() : tensor<1x64xf32>
+    %9153 = "ttir.relu"(%9151, %9152) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9154 = tensor.empty() : tensor<1x64xf32>
+    %9155 = "ttir.relu"(%9153, %9154) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9156 = tensor.empty() : tensor<1x64xf32>
+    %9157 = "ttir.relu"(%9155, %9156) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9158 = tensor.empty() : tensor<1x64xf32>
+    %9159 = "ttir.relu"(%9157, %9158) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9160 = tensor.empty() : tensor<1x64xf32>
+    %9161 = "ttir.relu"(%9159, %9160) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9162 = tensor.empty() : tensor<1x64xf32>
+    %9163 = "ttir.relu"(%9161, %9162) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9164 = tensor.empty() : tensor<1x64xf32>
+    %9165 = "ttir.relu"(%9163, %9164) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9166 = tensor.empty() : tensor<1x64xf32>
+    %9167 = "ttir.relu"(%9165, %9166) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9168 = tensor.empty() : tensor<1x64xf32>
+    %9169 = "ttir.relu"(%9167, %9168) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9170 = tensor.empty() : tensor<1x64xf32>
+    %9171 = "ttir.relu"(%9169, %9170) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9172 = tensor.empty() : tensor<1x64xf32>
+    %9173 = "ttir.relu"(%9171, %9172) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9174 = tensor.empty() : tensor<1x64xf32>
+    %9175 = "ttir.relu"(%9173, %9174) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9176 = tensor.empty() : tensor<1x64xf32>
+    %9177 = "ttir.relu"(%9175, %9176) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9178 = tensor.empty() : tensor<1x64xf32>
+    %9179 = "ttir.relu"(%9177, %9178) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9180 = tensor.empty() : tensor<1x64xf32>
+    %9181 = "ttir.relu"(%9179, %9180) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9182 = tensor.empty() : tensor<1x64xf32>
+    %9183 = "ttir.relu"(%9181, %9182) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9184 = tensor.empty() : tensor<1x64xf32>
+    %9185 = "ttir.relu"(%9183, %9184) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9186 = tensor.empty() : tensor<1x64xf32>
+    %9187 = "ttir.relu"(%9185, %9186) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9188 = tensor.empty() : tensor<1x64xf32>
+    %9189 = "ttir.relu"(%9187, %9188) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9190 = tensor.empty() : tensor<1x64xf32>
+    %9191 = "ttir.relu"(%9189, %9190) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9192 = tensor.empty() : tensor<1x64xf32>
+    %9193 = "ttir.relu"(%9191, %9192) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9194 = tensor.empty() : tensor<1x64xf32>
+    %9195 = "ttir.relu"(%9193, %9194) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9196 = tensor.empty() : tensor<1x64xf32>
+    %9197 = "ttir.relu"(%9195, %9196) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9198 = tensor.empty() : tensor<1x64xf32>
+    %9199 = "ttir.relu"(%9197, %9198) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9200 = tensor.empty() : tensor<1x64xf32>
+    %9201 = "ttir.relu"(%9199, %9200) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9202 = tensor.empty() : tensor<1x64xf32>
+    %9203 = "ttir.relu"(%9201, %9202) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9204 = tensor.empty() : tensor<1x64xf32>
+    %9205 = "ttir.relu"(%9203, %9204) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9206 = tensor.empty() : tensor<1x64xf32>
+    %9207 = "ttir.relu"(%9205, %9206) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9208 = tensor.empty() : tensor<1x64xf32>
+    %9209 = "ttir.relu"(%9207, %9208) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9210 = tensor.empty() : tensor<1x64xf32>
+    %9211 = "ttir.relu"(%9209, %9210) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9212 = tensor.empty() : tensor<1x64xf32>
+    %9213 = "ttir.relu"(%9211, %9212) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9214 = tensor.empty() : tensor<1x64xf32>
+    %9215 = "ttir.relu"(%9213, %9214) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9216 = tensor.empty() : tensor<1x64xf32>
+    %9217 = "ttir.relu"(%9215, %9216) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9218 = tensor.empty() : tensor<1x64xf32>
+    %9219 = "ttir.relu"(%9217, %9218) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9220 = tensor.empty() : tensor<1x64xf32>
+    %9221 = "ttir.relu"(%9219, %9220) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9222 = tensor.empty() : tensor<1x64xf32>
+    %9223 = "ttir.relu"(%9221, %9222) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9224 = tensor.empty() : tensor<1x64xf32>
+    %9225 = "ttir.relu"(%9223, %9224) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9226 = tensor.empty() : tensor<1x64xf32>
+    %9227 = "ttir.relu"(%9225, %9226) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9228 = tensor.empty() : tensor<1x64xf32>
+    %9229 = "ttir.relu"(%9227, %9228) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9230 = tensor.empty() : tensor<1x64xf32>
+    %9231 = "ttir.relu"(%9229, %9230) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9232 = tensor.empty() : tensor<1x64xf32>
+    %9233 = "ttir.relu"(%9231, %9232) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9234 = tensor.empty() : tensor<1x64xf32>
+    %9235 = "ttir.relu"(%9233, %9234) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9236 = tensor.empty() : tensor<1x64xf32>
+    %9237 = "ttir.relu"(%9235, %9236) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9238 = tensor.empty() : tensor<1x64xf32>
+    %9239 = "ttir.relu"(%9237, %9238) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9240 = tensor.empty() : tensor<1x64xf32>
+    %9241 = "ttir.relu"(%9239, %9240) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9242 = tensor.empty() : tensor<1x64xf32>
+    %9243 = "ttir.relu"(%9241, %9242) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9244 = tensor.empty() : tensor<1x64xf32>
+    %9245 = "ttir.relu"(%9243, %9244) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9246 = tensor.empty() : tensor<1x64xf32>
+    %9247 = "ttir.relu"(%9245, %9246) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9248 = tensor.empty() : tensor<1x64xf32>
+    %9249 = "ttir.relu"(%9247, %9248) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9250 = tensor.empty() : tensor<1x64xf32>
+    %9251 = "ttir.relu"(%9249, %9250) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9252 = tensor.empty() : tensor<1x64xf32>
+    %9253 = "ttir.relu"(%9251, %9252) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9254 = tensor.empty() : tensor<1x64xf32>
+    %9255 = "ttir.relu"(%9253, %9254) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9256 = tensor.empty() : tensor<1x64xf32>
+    %9257 = "ttir.relu"(%9255, %9256) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9258 = tensor.empty() : tensor<1x64xf32>
+    %9259 = "ttir.relu"(%9257, %9258) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9260 = tensor.empty() : tensor<1x64xf32>
+    %9261 = "ttir.relu"(%9259, %9260) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9262 = tensor.empty() : tensor<1x64xf32>
+    %9263 = "ttir.relu"(%9261, %9262) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9264 = tensor.empty() : tensor<1x64xf32>
+    %9265 = "ttir.relu"(%9263, %9264) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9266 = tensor.empty() : tensor<1x64xf32>
+    %9267 = "ttir.relu"(%9265, %9266) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9268 = tensor.empty() : tensor<1x64xf32>
+    %9269 = "ttir.relu"(%9267, %9268) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9270 = tensor.empty() : tensor<1x64xf32>
+    %9271 = "ttir.relu"(%9269, %9270) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9272 = tensor.empty() : tensor<1x64xf32>
+    %9273 = "ttir.relu"(%9271, %9272) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9274 = tensor.empty() : tensor<1x64xf32>
+    %9275 = "ttir.relu"(%9273, %9274) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9276 = tensor.empty() : tensor<1x64xf32>
+    %9277 = "ttir.relu"(%9275, %9276) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9278 = tensor.empty() : tensor<1x64xf32>
+    %9279 = "ttir.relu"(%9277, %9278) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9280 = tensor.empty() : tensor<1x64xf32>
+    %9281 = "ttir.relu"(%9279, %9280) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9282 = tensor.empty() : tensor<1x64xf32>
+    %9283 = "ttir.relu"(%9281, %9282) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9284 = tensor.empty() : tensor<1x64xf32>
+    %9285 = "ttir.relu"(%9283, %9284) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9286 = tensor.empty() : tensor<1x64xf32>
+    %9287 = "ttir.relu"(%9285, %9286) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9288 = tensor.empty() : tensor<1x64xf32>
+    %9289 = "ttir.relu"(%9287, %9288) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9290 = tensor.empty() : tensor<1x64xf32>
+    %9291 = "ttir.relu"(%9289, %9290) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9292 = tensor.empty() : tensor<1x64xf32>
+    %9293 = "ttir.relu"(%9291, %9292) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9294 = tensor.empty() : tensor<1x64xf32>
+    %9295 = "ttir.relu"(%9293, %9294) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9296 = tensor.empty() : tensor<1x64xf32>
+    %9297 = "ttir.relu"(%9295, %9296) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9298 = tensor.empty() : tensor<1x64xf32>
+    %9299 = "ttir.relu"(%9297, %9298) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9300 = tensor.empty() : tensor<1x64xf32>
+    %9301 = "ttir.relu"(%9299, %9300) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9302 = tensor.empty() : tensor<1x64xf32>
+    %9303 = "ttir.relu"(%9301, %9302) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9304 = tensor.empty() : tensor<1x64xf32>
+    %9305 = "ttir.relu"(%9303, %9304) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9306 = tensor.empty() : tensor<1x64xf32>
+    %9307 = "ttir.relu"(%9305, %9306) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9308 = tensor.empty() : tensor<1x64xf32>
+    %9309 = "ttir.relu"(%9307, %9308) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9310 = tensor.empty() : tensor<1x64xf32>
+    %9311 = "ttir.relu"(%9309, %9310) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9312 = tensor.empty() : tensor<1x64xf32>
+    %9313 = "ttir.relu"(%9311, %9312) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9314 = tensor.empty() : tensor<1x64xf32>
+    %9315 = "ttir.relu"(%9313, %9314) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9316 = tensor.empty() : tensor<1x64xf32>
+    %9317 = "ttir.relu"(%9315, %9316) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9318 = tensor.empty() : tensor<1x64xf32>
+    %9319 = "ttir.relu"(%9317, %9318) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9320 = tensor.empty() : tensor<1x64xf32>
+    %9321 = "ttir.relu"(%9319, %9320) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9322 = tensor.empty() : tensor<1x64xf32>
+    %9323 = "ttir.relu"(%9321, %9322) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9324 = tensor.empty() : tensor<1x64xf32>
+    %9325 = "ttir.relu"(%9323, %9324) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9326 = tensor.empty() : tensor<1x64xf32>
+    %9327 = "ttir.relu"(%9325, %9326) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9328 = tensor.empty() : tensor<1x64xf32>
+    %9329 = "ttir.relu"(%9327, %9328) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9330 = tensor.empty() : tensor<1x64xf32>
+    %9331 = "ttir.relu"(%9329, %9330) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9332 = tensor.empty() : tensor<1x64xf32>
+    %9333 = "ttir.relu"(%9331, %9332) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9334 = tensor.empty() : tensor<1x64xf32>
+    %9335 = "ttir.relu"(%9333, %9334) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9336 = tensor.empty() : tensor<1x64xf32>
+    %9337 = "ttir.relu"(%9335, %9336) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9338 = tensor.empty() : tensor<1x64xf32>
+    %9339 = "ttir.relu"(%9337, %9338) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9340 = tensor.empty() : tensor<1x64xf32>
+    %9341 = "ttir.relu"(%9339, %9340) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9342 = tensor.empty() : tensor<1x64xf32>
+    %9343 = "ttir.relu"(%9341, %9342) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9344 = tensor.empty() : tensor<1x64xf32>
+    %9345 = "ttir.relu"(%9343, %9344) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9346 = tensor.empty() : tensor<1x64xf32>
+    %9347 = "ttir.relu"(%9345, %9346) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9348 = tensor.empty() : tensor<1x64xf32>
+    %9349 = "ttir.relu"(%9347, %9348) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9350 = tensor.empty() : tensor<1x64xf32>
+    %9351 = "ttir.relu"(%9349, %9350) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9352 = tensor.empty() : tensor<1x64xf32>
+    %9353 = "ttir.relu"(%9351, %9352) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9354 = tensor.empty() : tensor<1x64xf32>
+    %9355 = "ttir.relu"(%9353, %9354) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9356 = tensor.empty() : tensor<1x64xf32>
+    %9357 = "ttir.relu"(%9355, %9356) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9358 = tensor.empty() : tensor<1x64xf32>
+    %9359 = "ttir.relu"(%9357, %9358) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9360 = tensor.empty() : tensor<1x64xf32>
+    %9361 = "ttir.relu"(%9359, %9360) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9362 = tensor.empty() : tensor<1x64xf32>
+    %9363 = "ttir.relu"(%9361, %9362) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9364 = tensor.empty() : tensor<1x64xf32>
+    %9365 = "ttir.relu"(%9363, %9364) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9366 = tensor.empty() : tensor<1x64xf32>
+    %9367 = "ttir.relu"(%9365, %9366) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9368 = tensor.empty() : tensor<1x64xf32>
+    %9369 = "ttir.relu"(%9367, %9368) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9370 = tensor.empty() : tensor<1x64xf32>
+    %9371 = "ttir.relu"(%9369, %9370) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9372 = tensor.empty() : tensor<1x64xf32>
+    %9373 = "ttir.relu"(%9371, %9372) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9374 = tensor.empty() : tensor<1x64xf32>
+    %9375 = "ttir.relu"(%9373, %9374) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9376 = tensor.empty() : tensor<1x64xf32>
+    %9377 = "ttir.relu"(%9375, %9376) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9378 = tensor.empty() : tensor<1x64xf32>
+    %9379 = "ttir.relu"(%9377, %9378) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9380 = tensor.empty() : tensor<1x64xf32>
+    %9381 = "ttir.relu"(%9379, %9380) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9382 = tensor.empty() : tensor<1x64xf32>
+    %9383 = "ttir.relu"(%9381, %9382) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9384 = tensor.empty() : tensor<1x64xf32>
+    %9385 = "ttir.relu"(%9383, %9384) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9386 = tensor.empty() : tensor<1x64xf32>
+    %9387 = "ttir.relu"(%9385, %9386) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9388 = tensor.empty() : tensor<1x64xf32>
+    %9389 = "ttir.relu"(%9387, %9388) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9390 = tensor.empty() : tensor<1x64xf32>
+    %9391 = "ttir.relu"(%9389, %9390) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9392 = tensor.empty() : tensor<1x64xf32>
+    %9393 = "ttir.relu"(%9391, %9392) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9394 = tensor.empty() : tensor<1x64xf32>
+    %9395 = "ttir.relu"(%9393, %9394) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9396 = tensor.empty() : tensor<1x64xf32>
+    %9397 = "ttir.relu"(%9395, %9396) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9398 = tensor.empty() : tensor<1x64xf32>
+    %9399 = "ttir.relu"(%9397, %9398) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9400 = tensor.empty() : tensor<1x64xf32>
+    %9401 = "ttir.relu"(%9399, %9400) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9402 = tensor.empty() : tensor<1x64xf32>
+    %9403 = "ttir.relu"(%9401, %9402) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9404 = tensor.empty() : tensor<1x64xf32>
+    %9405 = "ttir.relu"(%9403, %9404) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9406 = tensor.empty() : tensor<1x64xf32>
+    %9407 = "ttir.relu"(%9405, %9406) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9408 = tensor.empty() : tensor<1x64xf32>
+    %9409 = "ttir.relu"(%9407, %9408) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9410 = tensor.empty() : tensor<1x64xf32>
+    %9411 = "ttir.relu"(%9409, %9410) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9412 = tensor.empty() : tensor<1x64xf32>
+    %9413 = "ttir.relu"(%9411, %9412) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9414 = tensor.empty() : tensor<1x64xf32>
+    %9415 = "ttir.relu"(%9413, %9414) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9416 = tensor.empty() : tensor<1x64xf32>
+    %9417 = "ttir.relu"(%9415, %9416) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9418 = tensor.empty() : tensor<1x64xf32>
+    %9419 = "ttir.relu"(%9417, %9418) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9420 = tensor.empty() : tensor<1x64xf32>
+    %9421 = "ttir.relu"(%9419, %9420) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9422 = tensor.empty() : tensor<1x64xf32>
+    %9423 = "ttir.relu"(%9421, %9422) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9424 = tensor.empty() : tensor<1x64xf32>
+    %9425 = "ttir.relu"(%9423, %9424) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9426 = tensor.empty() : tensor<1x64xf32>
+    %9427 = "ttir.relu"(%9425, %9426) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9428 = tensor.empty() : tensor<1x64xf32>
+    %9429 = "ttir.relu"(%9427, %9428) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9430 = tensor.empty() : tensor<1x64xf32>
+    %9431 = "ttir.relu"(%9429, %9430) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9432 = tensor.empty() : tensor<1x64xf32>
+    %9433 = "ttir.relu"(%9431, %9432) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9434 = tensor.empty() : tensor<1x64xf32>
+    %9435 = "ttir.relu"(%9433, %9434) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9436 = tensor.empty() : tensor<1x64xf32>
+    %9437 = "ttir.relu"(%9435, %9436) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9438 = tensor.empty() : tensor<1x64xf32>
+    %9439 = "ttir.relu"(%9437, %9438) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9440 = tensor.empty() : tensor<1x64xf32>
+    %9441 = "ttir.relu"(%9439, %9440) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9442 = tensor.empty() : tensor<1x64xf32>
+    %9443 = "ttir.relu"(%9441, %9442) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9444 = tensor.empty() : tensor<1x64xf32>
+    %9445 = "ttir.relu"(%9443, %9444) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9446 = tensor.empty() : tensor<1x64xf32>
+    %9447 = "ttir.relu"(%9445, %9446) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9448 = tensor.empty() : tensor<1x64xf32>
+    %9449 = "ttir.relu"(%9447, %9448) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9450 = tensor.empty() : tensor<1x64xf32>
+    %9451 = "ttir.relu"(%9449, %9450) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9452 = tensor.empty() : tensor<1x64xf32>
+    %9453 = "ttir.relu"(%9451, %9452) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9454 = tensor.empty() : tensor<1x64xf32>
+    %9455 = "ttir.relu"(%9453, %9454) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9456 = tensor.empty() : tensor<1x64xf32>
+    %9457 = "ttir.relu"(%9455, %9456) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9458 = tensor.empty() : tensor<1x64xf32>
+    %9459 = "ttir.relu"(%9457, %9458) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9460 = tensor.empty() : tensor<1x64xf32>
+    %9461 = "ttir.relu"(%9459, %9460) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9462 = tensor.empty() : tensor<1x64xf32>
+    %9463 = "ttir.relu"(%9461, %9462) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9464 = tensor.empty() : tensor<1x64xf32>
+    %9465 = "ttir.relu"(%9463, %9464) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9466 = tensor.empty() : tensor<1x64xf32>
+    %9467 = "ttir.relu"(%9465, %9466) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9468 = tensor.empty() : tensor<1x64xf32>
+    %9469 = "ttir.relu"(%9467, %9468) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9470 = tensor.empty() : tensor<1x64xf32>
+    %9471 = "ttir.relu"(%9469, %9470) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9472 = tensor.empty() : tensor<1x64xf32>
+    %9473 = "ttir.relu"(%9471, %9472) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9474 = tensor.empty() : tensor<1x64xf32>
+    %9475 = "ttir.relu"(%9473, %9474) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9476 = tensor.empty() : tensor<1x64xf32>
+    %9477 = "ttir.relu"(%9475, %9476) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9478 = tensor.empty() : tensor<1x64xf32>
+    %9479 = "ttir.relu"(%9477, %9478) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9480 = tensor.empty() : tensor<1x64xf32>
+    %9481 = "ttir.relu"(%9479, %9480) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9482 = tensor.empty() : tensor<1x64xf32>
+    %9483 = "ttir.relu"(%9481, %9482) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9484 = tensor.empty() : tensor<1x64xf32>
+    %9485 = "ttir.relu"(%9483, %9484) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9486 = tensor.empty() : tensor<1x64xf32>
+    %9487 = "ttir.relu"(%9485, %9486) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9488 = tensor.empty() : tensor<1x64xf32>
+    %9489 = "ttir.relu"(%9487, %9488) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9490 = tensor.empty() : tensor<1x64xf32>
+    %9491 = "ttir.relu"(%9489, %9490) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9492 = tensor.empty() : tensor<1x64xf32>
+    %9493 = "ttir.relu"(%9491, %9492) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9494 = tensor.empty() : tensor<1x64xf32>
+    %9495 = "ttir.relu"(%9493, %9494) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9496 = tensor.empty() : tensor<1x64xf32>
+    %9497 = "ttir.relu"(%9495, %9496) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9498 = tensor.empty() : tensor<1x64xf32>
+    %9499 = "ttir.relu"(%9497, %9498) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9500 = tensor.empty() : tensor<1x64xf32>
+    %9501 = "ttir.relu"(%9499, %9500) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9502 = tensor.empty() : tensor<1x64xf32>
+    %9503 = "ttir.relu"(%9501, %9502) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9504 = tensor.empty() : tensor<1x64xf32>
+    %9505 = "ttir.relu"(%9503, %9504) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9506 = tensor.empty() : tensor<1x64xf32>
+    %9507 = "ttir.relu"(%9505, %9506) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9508 = tensor.empty() : tensor<1x64xf32>
+    %9509 = "ttir.relu"(%9507, %9508) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9510 = tensor.empty() : tensor<1x64xf32>
+    %9511 = "ttir.relu"(%9509, %9510) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9512 = tensor.empty() : tensor<1x64xf32>
+    %9513 = "ttir.relu"(%9511, %9512) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9514 = tensor.empty() : tensor<1x64xf32>
+    %9515 = "ttir.relu"(%9513, %9514) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9516 = tensor.empty() : tensor<1x64xf32>
+    %9517 = "ttir.relu"(%9515, %9516) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9518 = tensor.empty() : tensor<1x64xf32>
+    %9519 = "ttir.relu"(%9517, %9518) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9520 = tensor.empty() : tensor<1x64xf32>
+    %9521 = "ttir.relu"(%9519, %9520) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9522 = tensor.empty() : tensor<1x64xf32>
+    %9523 = "ttir.relu"(%9521, %9522) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9524 = tensor.empty() : tensor<1x64xf32>
+    %9525 = "ttir.relu"(%9523, %9524) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9526 = tensor.empty() : tensor<1x64xf32>
+    %9527 = "ttir.relu"(%9525, %9526) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9528 = tensor.empty() : tensor<1x64xf32>
+    %9529 = "ttir.relu"(%9527, %9528) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9530 = tensor.empty() : tensor<1x64xf32>
+    %9531 = "ttir.relu"(%9529, %9530) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9532 = tensor.empty() : tensor<1x64xf32>
+    %9533 = "ttir.relu"(%9531, %9532) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9534 = tensor.empty() : tensor<1x64xf32>
+    %9535 = "ttir.relu"(%9533, %9534) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9536 = tensor.empty() : tensor<1x64xf32>
+    %9537 = "ttir.relu"(%9535, %9536) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9538 = tensor.empty() : tensor<1x64xf32>
+    %9539 = "ttir.relu"(%9537, %9538) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9540 = tensor.empty() : tensor<1x64xf32>
+    %9541 = "ttir.relu"(%9539, %9540) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9542 = tensor.empty() : tensor<1x64xf32>
+    %9543 = "ttir.relu"(%9541, %9542) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9544 = tensor.empty() : tensor<1x64xf32>
+    %9545 = "ttir.relu"(%9543, %9544) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9546 = tensor.empty() : tensor<1x64xf32>
+    %9547 = "ttir.relu"(%9545, %9546) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9548 = tensor.empty() : tensor<1x64xf32>
+    %9549 = "ttir.relu"(%9547, %9548) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9550 = tensor.empty() : tensor<1x64xf32>
+    %9551 = "ttir.relu"(%9549, %9550) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9552 = tensor.empty() : tensor<1x64xf32>
+    %9553 = "ttir.relu"(%9551, %9552) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9554 = tensor.empty() : tensor<1x64xf32>
+    %9555 = "ttir.relu"(%9553, %9554) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9556 = tensor.empty() : tensor<1x64xf32>
+    %9557 = "ttir.relu"(%9555, %9556) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9558 = tensor.empty() : tensor<1x64xf32>
+    %9559 = "ttir.relu"(%9557, %9558) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9560 = tensor.empty() : tensor<1x64xf32>
+    %9561 = "ttir.relu"(%9559, %9560) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9562 = tensor.empty() : tensor<1x64xf32>
+    %9563 = "ttir.relu"(%9561, %9562) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9564 = tensor.empty() : tensor<1x64xf32>
+    %9565 = "ttir.relu"(%9563, %9564) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9566 = tensor.empty() : tensor<1x64xf32>
+    %9567 = "ttir.relu"(%9565, %9566) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9568 = tensor.empty() : tensor<1x64xf32>
+    %9569 = "ttir.relu"(%9567, %9568) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9570 = tensor.empty() : tensor<1x64xf32>
+    %9571 = "ttir.relu"(%9569, %9570) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9572 = tensor.empty() : tensor<1x64xf32>
+    %9573 = "ttir.relu"(%9571, %9572) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9574 = tensor.empty() : tensor<1x64xf32>
+    %9575 = "ttir.relu"(%9573, %9574) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9576 = tensor.empty() : tensor<1x64xf32>
+    %9577 = "ttir.relu"(%9575, %9576) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9578 = tensor.empty() : tensor<1x64xf32>
+    %9579 = "ttir.relu"(%9577, %9578) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9580 = tensor.empty() : tensor<1x64xf32>
+    %9581 = "ttir.relu"(%9579, %9580) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9582 = tensor.empty() : tensor<1x64xf32>
+    %9583 = "ttir.relu"(%9581, %9582) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9584 = tensor.empty() : tensor<1x64xf32>
+    %9585 = "ttir.relu"(%9583, %9584) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9586 = tensor.empty() : tensor<1x64xf32>
+    %9587 = "ttir.relu"(%9585, %9586) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9588 = tensor.empty() : tensor<1x64xf32>
+    %9589 = "ttir.relu"(%9587, %9588) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9590 = tensor.empty() : tensor<1x64xf32>
+    %9591 = "ttir.relu"(%9589, %9590) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9592 = tensor.empty() : tensor<1x64xf32>
+    %9593 = "ttir.relu"(%9591, %9592) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9594 = tensor.empty() : tensor<1x64xf32>
+    %9595 = "ttir.relu"(%9593, %9594) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9596 = tensor.empty() : tensor<1x64xf32>
+    %9597 = "ttir.relu"(%9595, %9596) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9598 = tensor.empty() : tensor<1x64xf32>
+    %9599 = "ttir.relu"(%9597, %9598) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9600 = tensor.empty() : tensor<1x64xf32>
+    %9601 = "ttir.relu"(%9599, %9600) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9602 = tensor.empty() : tensor<1x64xf32>
+    %9603 = "ttir.relu"(%9601, %9602) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9604 = tensor.empty() : tensor<1x64xf32>
+    %9605 = "ttir.relu"(%9603, %9604) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9606 = tensor.empty() : tensor<1x64xf32>
+    %9607 = "ttir.relu"(%9605, %9606) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9608 = tensor.empty() : tensor<1x64xf32>
+    %9609 = "ttir.relu"(%9607, %9608) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9610 = tensor.empty() : tensor<1x64xf32>
+    %9611 = "ttir.relu"(%9609, %9610) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9612 = tensor.empty() : tensor<1x64xf32>
+    %9613 = "ttir.relu"(%9611, %9612) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9614 = tensor.empty() : tensor<1x64xf32>
+    %9615 = "ttir.relu"(%9613, %9614) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9616 = tensor.empty() : tensor<1x64xf32>
+    %9617 = "ttir.relu"(%9615, %9616) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9618 = tensor.empty() : tensor<1x64xf32>
+    %9619 = "ttir.relu"(%9617, %9618) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9620 = tensor.empty() : tensor<1x64xf32>
+    %9621 = "ttir.relu"(%9619, %9620) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9622 = tensor.empty() : tensor<1x64xf32>
+    %9623 = "ttir.relu"(%9621, %9622) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9624 = tensor.empty() : tensor<1x64xf32>
+    %9625 = "ttir.relu"(%9623, %9624) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9626 = tensor.empty() : tensor<1x64xf32>
+    %9627 = "ttir.relu"(%9625, %9626) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9628 = tensor.empty() : tensor<1x64xf32>
+    %9629 = "ttir.relu"(%9627, %9628) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9630 = tensor.empty() : tensor<1x64xf32>
+    %9631 = "ttir.relu"(%9629, %9630) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9632 = tensor.empty() : tensor<1x64xf32>
+    %9633 = "ttir.relu"(%9631, %9632) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9634 = tensor.empty() : tensor<1x64xf32>
+    %9635 = "ttir.relu"(%9633, %9634) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9636 = tensor.empty() : tensor<1x64xf32>
+    %9637 = "ttir.relu"(%9635, %9636) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9638 = tensor.empty() : tensor<1x64xf32>
+    %9639 = "ttir.relu"(%9637, %9638) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9640 = tensor.empty() : tensor<1x64xf32>
+    %9641 = "ttir.relu"(%9639, %9640) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9642 = tensor.empty() : tensor<1x64xf32>
+    %9643 = "ttir.relu"(%9641, %9642) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9644 = tensor.empty() : tensor<1x64xf32>
+    %9645 = "ttir.relu"(%9643, %9644) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9646 = tensor.empty() : tensor<1x64xf32>
+    %9647 = "ttir.relu"(%9645, %9646) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9648 = tensor.empty() : tensor<1x64xf32>
+    %9649 = "ttir.relu"(%9647, %9648) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9650 = tensor.empty() : tensor<1x64xf32>
+    %9651 = "ttir.relu"(%9649, %9650) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9652 = tensor.empty() : tensor<1x64xf32>
+    %9653 = "ttir.relu"(%9651, %9652) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9654 = tensor.empty() : tensor<1x64xf32>
+    %9655 = "ttir.relu"(%9653, %9654) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9656 = tensor.empty() : tensor<1x64xf32>
+    %9657 = "ttir.relu"(%9655, %9656) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9658 = tensor.empty() : tensor<1x64xf32>
+    %9659 = "ttir.relu"(%9657, %9658) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9660 = tensor.empty() : tensor<1x64xf32>
+    %9661 = "ttir.relu"(%9659, %9660) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9662 = tensor.empty() : tensor<1x64xf32>
+    %9663 = "ttir.relu"(%9661, %9662) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9664 = tensor.empty() : tensor<1x64xf32>
+    %9665 = "ttir.relu"(%9663, %9664) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9666 = tensor.empty() : tensor<1x64xf32>
+    %9667 = "ttir.relu"(%9665, %9666) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9668 = tensor.empty() : tensor<1x64xf32>
+    %9669 = "ttir.relu"(%9667, %9668) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9670 = tensor.empty() : tensor<1x64xf32>
+    %9671 = "ttir.relu"(%9669, %9670) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9672 = tensor.empty() : tensor<1x64xf32>
+    %9673 = "ttir.relu"(%9671, %9672) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9674 = tensor.empty() : tensor<1x64xf32>
+    %9675 = "ttir.relu"(%9673, %9674) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9676 = tensor.empty() : tensor<1x64xf32>
+    %9677 = "ttir.relu"(%9675, %9676) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9678 = tensor.empty() : tensor<1x64xf32>
+    %9679 = "ttir.relu"(%9677, %9678) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9680 = tensor.empty() : tensor<1x64xf32>
+    %9681 = "ttir.relu"(%9679, %9680) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9682 = tensor.empty() : tensor<1x64xf32>
+    %9683 = "ttir.relu"(%9681, %9682) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9684 = tensor.empty() : tensor<1x64xf32>
+    %9685 = "ttir.relu"(%9683, %9684) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9686 = tensor.empty() : tensor<1x64xf32>
+    %9687 = "ttir.relu"(%9685, %9686) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9688 = tensor.empty() : tensor<1x64xf32>
+    %9689 = "ttir.relu"(%9687, %9688) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9690 = tensor.empty() : tensor<1x64xf32>
+    %9691 = "ttir.relu"(%9689, %9690) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9692 = tensor.empty() : tensor<1x64xf32>
+    %9693 = "ttir.relu"(%9691, %9692) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9694 = tensor.empty() : tensor<1x64xf32>
+    %9695 = "ttir.relu"(%9693, %9694) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9696 = tensor.empty() : tensor<1x64xf32>
+    %9697 = "ttir.relu"(%9695, %9696) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9698 = tensor.empty() : tensor<1x64xf32>
+    %9699 = "ttir.relu"(%9697, %9698) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9700 = tensor.empty() : tensor<1x64xf32>
+    %9701 = "ttir.relu"(%9699, %9700) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9702 = tensor.empty() : tensor<1x64xf32>
+    %9703 = "ttir.relu"(%9701, %9702) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9704 = tensor.empty() : tensor<1x64xf32>
+    %9705 = "ttir.relu"(%9703, %9704) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9706 = tensor.empty() : tensor<1x64xf32>
+    %9707 = "ttir.relu"(%9705, %9706) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9708 = tensor.empty() : tensor<1x64xf32>
+    %9709 = "ttir.relu"(%9707, %9708) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9710 = tensor.empty() : tensor<1x64xf32>
+    %9711 = "ttir.relu"(%9709, %9710) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9712 = tensor.empty() : tensor<1x64xf32>
+    %9713 = "ttir.relu"(%9711, %9712) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9714 = tensor.empty() : tensor<1x64xf32>
+    %9715 = "ttir.relu"(%9713, %9714) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9716 = tensor.empty() : tensor<1x64xf32>
+    %9717 = "ttir.relu"(%9715, %9716) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9718 = tensor.empty() : tensor<1x64xf32>
+    %9719 = "ttir.relu"(%9717, %9718) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9720 = tensor.empty() : tensor<1x64xf32>
+    %9721 = "ttir.relu"(%9719, %9720) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9722 = tensor.empty() : tensor<1x64xf32>
+    %9723 = "ttir.relu"(%9721, %9722) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9724 = tensor.empty() : tensor<1x64xf32>
+    %9725 = "ttir.relu"(%9723, %9724) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9726 = tensor.empty() : tensor<1x64xf32>
+    %9727 = "ttir.relu"(%9725, %9726) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9728 = tensor.empty() : tensor<1x64xf32>
+    %9729 = "ttir.relu"(%9727, %9728) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9730 = tensor.empty() : tensor<1x64xf32>
+    %9731 = "ttir.relu"(%9729, %9730) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9732 = tensor.empty() : tensor<1x64xf32>
+    %9733 = "ttir.relu"(%9731, %9732) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9734 = tensor.empty() : tensor<1x64xf32>
+    %9735 = "ttir.relu"(%9733, %9734) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9736 = tensor.empty() : tensor<1x64xf32>
+    %9737 = "ttir.relu"(%9735, %9736) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9738 = tensor.empty() : tensor<1x64xf32>
+    %9739 = "ttir.relu"(%9737, %9738) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9740 = tensor.empty() : tensor<1x64xf32>
+    %9741 = "ttir.relu"(%9739, %9740) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9742 = tensor.empty() : tensor<1x64xf32>
+    %9743 = "ttir.relu"(%9741, %9742) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9744 = tensor.empty() : tensor<1x64xf32>
+    %9745 = "ttir.relu"(%9743, %9744) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9746 = tensor.empty() : tensor<1x64xf32>
+    %9747 = "ttir.relu"(%9745, %9746) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9748 = tensor.empty() : tensor<1x64xf32>
+    %9749 = "ttir.relu"(%9747, %9748) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9750 = tensor.empty() : tensor<1x64xf32>
+    %9751 = "ttir.relu"(%9749, %9750) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9752 = tensor.empty() : tensor<1x64xf32>
+    %9753 = "ttir.relu"(%9751, %9752) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9754 = tensor.empty() : tensor<1x64xf32>
+    %9755 = "ttir.relu"(%9753, %9754) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9756 = tensor.empty() : tensor<1x64xf32>
+    %9757 = "ttir.relu"(%9755, %9756) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9758 = tensor.empty() : tensor<1x64xf32>
+    %9759 = "ttir.relu"(%9757, %9758) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9760 = tensor.empty() : tensor<1x64xf32>
+    %9761 = "ttir.relu"(%9759, %9760) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9762 = tensor.empty() : tensor<1x64xf32>
+    %9763 = "ttir.relu"(%9761, %9762) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9764 = tensor.empty() : tensor<1x64xf32>
+    %9765 = "ttir.relu"(%9763, %9764) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9766 = tensor.empty() : tensor<1x64xf32>
+    %9767 = "ttir.relu"(%9765, %9766) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9768 = tensor.empty() : tensor<1x64xf32>
+    %9769 = "ttir.relu"(%9767, %9768) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9770 = tensor.empty() : tensor<1x64xf32>
+    %9771 = "ttir.relu"(%9769, %9770) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9772 = tensor.empty() : tensor<1x64xf32>
+    %9773 = "ttir.relu"(%9771, %9772) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9774 = tensor.empty() : tensor<1x64xf32>
+    %9775 = "ttir.relu"(%9773, %9774) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9776 = tensor.empty() : tensor<1x64xf32>
+    %9777 = "ttir.relu"(%9775, %9776) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9778 = tensor.empty() : tensor<1x64xf32>
+    %9779 = "ttir.relu"(%9777, %9778) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9780 = tensor.empty() : tensor<1x64xf32>
+    %9781 = "ttir.relu"(%9779, %9780) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9782 = tensor.empty() : tensor<1x64xf32>
+    %9783 = "ttir.relu"(%9781, %9782) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9784 = tensor.empty() : tensor<1x64xf32>
+    %9785 = "ttir.relu"(%9783, %9784) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9786 = tensor.empty() : tensor<1x64xf32>
+    %9787 = "ttir.relu"(%9785, %9786) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9788 = tensor.empty() : tensor<1x64xf32>
+    %9789 = "ttir.relu"(%9787, %9788) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9790 = tensor.empty() : tensor<1x64xf32>
+    %9791 = "ttir.relu"(%9789, %9790) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9792 = tensor.empty() : tensor<1x64xf32>
+    %9793 = "ttir.relu"(%9791, %9792) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9794 = tensor.empty() : tensor<1x64xf32>
+    %9795 = "ttir.relu"(%9793, %9794) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9796 = tensor.empty() : tensor<1x64xf32>
+    %9797 = "ttir.relu"(%9795, %9796) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9798 = tensor.empty() : tensor<1x64xf32>
+    %9799 = "ttir.relu"(%9797, %9798) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9800 = tensor.empty() : tensor<1x64xf32>
+    %9801 = "ttir.relu"(%9799, %9800) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9802 = tensor.empty() : tensor<1x64xf32>
+    %9803 = "ttir.relu"(%9801, %9802) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9804 = tensor.empty() : tensor<1x64xf32>
+    %9805 = "ttir.relu"(%9803, %9804) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9806 = tensor.empty() : tensor<1x64xf32>
+    %9807 = "ttir.relu"(%9805, %9806) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9808 = tensor.empty() : tensor<1x64xf32>
+    %9809 = "ttir.relu"(%9807, %9808) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9810 = tensor.empty() : tensor<1x64xf32>
+    %9811 = "ttir.relu"(%9809, %9810) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9812 = tensor.empty() : tensor<1x64xf32>
+    %9813 = "ttir.relu"(%9811, %9812) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9814 = tensor.empty() : tensor<1x64xf32>
+    %9815 = "ttir.relu"(%9813, %9814) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9816 = tensor.empty() : tensor<1x64xf32>
+    %9817 = "ttir.relu"(%9815, %9816) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9818 = tensor.empty() : tensor<1x64xf32>
+    %9819 = "ttir.relu"(%9817, %9818) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9820 = tensor.empty() : tensor<1x64xf32>
+    %9821 = "ttir.relu"(%9819, %9820) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9822 = tensor.empty() : tensor<1x64xf32>
+    %9823 = "ttir.relu"(%9821, %9822) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9824 = tensor.empty() : tensor<1x64xf32>
+    %9825 = "ttir.relu"(%9823, %9824) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9826 = tensor.empty() : tensor<1x64xf32>
+    %9827 = "ttir.relu"(%9825, %9826) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9828 = tensor.empty() : tensor<1x64xf32>
+    %9829 = "ttir.relu"(%9827, %9828) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9830 = tensor.empty() : tensor<1x64xf32>
+    %9831 = "ttir.relu"(%9829, %9830) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9832 = tensor.empty() : tensor<1x64xf32>
+    %9833 = "ttir.relu"(%9831, %9832) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9834 = tensor.empty() : tensor<1x64xf32>
+    %9835 = "ttir.relu"(%9833, %9834) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9836 = tensor.empty() : tensor<1x64xf32>
+    %9837 = "ttir.relu"(%9835, %9836) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9838 = tensor.empty() : tensor<1x64xf32>
+    %9839 = "ttir.relu"(%9837, %9838) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9840 = tensor.empty() : tensor<1x64xf32>
+    %9841 = "ttir.relu"(%9839, %9840) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9842 = tensor.empty() : tensor<1x64xf32>
+    %9843 = "ttir.relu"(%9841, %9842) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9844 = tensor.empty() : tensor<1x64xf32>
+    %9845 = "ttir.relu"(%9843, %9844) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9846 = tensor.empty() : tensor<1x64xf32>
+    %9847 = "ttir.relu"(%9845, %9846) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9848 = tensor.empty() : tensor<1x64xf32>
+    %9849 = "ttir.relu"(%9847, %9848) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9850 = tensor.empty() : tensor<1x64xf32>
+    %9851 = "ttir.relu"(%9849, %9850) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9852 = tensor.empty() : tensor<1x64xf32>
+    %9853 = "ttir.relu"(%9851, %9852) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9854 = tensor.empty() : tensor<1x64xf32>
+    %9855 = "ttir.relu"(%9853, %9854) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9856 = tensor.empty() : tensor<1x64xf32>
+    %9857 = "ttir.relu"(%9855, %9856) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9858 = tensor.empty() : tensor<1x64xf32>
+    %9859 = "ttir.relu"(%9857, %9858) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9860 = tensor.empty() : tensor<1x64xf32>
+    %9861 = "ttir.relu"(%9859, %9860) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9862 = tensor.empty() : tensor<1x64xf32>
+    %9863 = "ttir.relu"(%9861, %9862) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9864 = tensor.empty() : tensor<1x64xf32>
+    %9865 = "ttir.relu"(%9863, %9864) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9866 = tensor.empty() : tensor<1x64xf32>
+    %9867 = "ttir.relu"(%9865, %9866) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9868 = tensor.empty() : tensor<1x64xf32>
+    %9869 = "ttir.relu"(%9867, %9868) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9870 = tensor.empty() : tensor<1x64xf32>
+    %9871 = "ttir.relu"(%9869, %9870) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9872 = tensor.empty() : tensor<1x64xf32>
+    %9873 = "ttir.relu"(%9871, %9872) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9874 = tensor.empty() : tensor<1x64xf32>
+    %9875 = "ttir.relu"(%9873, %9874) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9876 = tensor.empty() : tensor<1x64xf32>
+    %9877 = "ttir.relu"(%9875, %9876) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9878 = tensor.empty() : tensor<1x64xf32>
+    %9879 = "ttir.relu"(%9877, %9878) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9880 = tensor.empty() : tensor<1x64xf32>
+    %9881 = "ttir.relu"(%9879, %9880) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9882 = tensor.empty() : tensor<1x64xf32>
+    %9883 = "ttir.relu"(%9881, %9882) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9884 = tensor.empty() : tensor<1x64xf32>
+    %9885 = "ttir.relu"(%9883, %9884) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9886 = tensor.empty() : tensor<1x64xf32>
+    %9887 = "ttir.relu"(%9885, %9886) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9888 = tensor.empty() : tensor<1x64xf32>
+    %9889 = "ttir.relu"(%9887, %9888) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9890 = tensor.empty() : tensor<1x64xf32>
+    %9891 = "ttir.relu"(%9889, %9890) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9892 = tensor.empty() : tensor<1x64xf32>
+    %9893 = "ttir.relu"(%9891, %9892) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9894 = tensor.empty() : tensor<1x64xf32>
+    %9895 = "ttir.relu"(%9893, %9894) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9896 = tensor.empty() : tensor<1x64xf32>
+    %9897 = "ttir.relu"(%9895, %9896) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9898 = tensor.empty() : tensor<1x64xf32>
+    %9899 = "ttir.relu"(%9897, %9898) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9900 = tensor.empty() : tensor<1x64xf32>
+    %9901 = "ttir.relu"(%9899, %9900) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9902 = tensor.empty() : tensor<1x64xf32>
+    %9903 = "ttir.relu"(%9901, %9902) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9904 = tensor.empty() : tensor<1x64xf32>
+    %9905 = "ttir.relu"(%9903, %9904) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9906 = tensor.empty() : tensor<1x64xf32>
+    %9907 = "ttir.relu"(%9905, %9906) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9908 = tensor.empty() : tensor<1x64xf32>
+    %9909 = "ttir.relu"(%9907, %9908) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9910 = tensor.empty() : tensor<1x64xf32>
+    %9911 = "ttir.relu"(%9909, %9910) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9912 = tensor.empty() : tensor<1x64xf32>
+    %9913 = "ttir.relu"(%9911, %9912) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9914 = tensor.empty() : tensor<1x64xf32>
+    %9915 = "ttir.relu"(%9913, %9914) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9916 = tensor.empty() : tensor<1x64xf32>
+    %9917 = "ttir.relu"(%9915, %9916) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9918 = tensor.empty() : tensor<1x64xf32>
+    %9919 = "ttir.relu"(%9917, %9918) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9920 = tensor.empty() : tensor<1x64xf32>
+    %9921 = "ttir.relu"(%9919, %9920) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9922 = tensor.empty() : tensor<1x64xf32>
+    %9923 = "ttir.relu"(%9921, %9922) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9924 = tensor.empty() : tensor<1x64xf32>
+    %9925 = "ttir.relu"(%9923, %9924) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9926 = tensor.empty() : tensor<1x64xf32>
+    %9927 = "ttir.relu"(%9925, %9926) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9928 = tensor.empty() : tensor<1x64xf32>
+    %9929 = "ttir.relu"(%9927, %9928) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9930 = tensor.empty() : tensor<1x64xf32>
+    %9931 = "ttir.relu"(%9929, %9930) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9932 = tensor.empty() : tensor<1x64xf32>
+    %9933 = "ttir.relu"(%9931, %9932) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9934 = tensor.empty() : tensor<1x64xf32>
+    %9935 = "ttir.relu"(%9933, %9934) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9936 = tensor.empty() : tensor<1x64xf32>
+    %9937 = "ttir.relu"(%9935, %9936) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9938 = tensor.empty() : tensor<1x64xf32>
+    %9939 = "ttir.relu"(%9937, %9938) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9940 = tensor.empty() : tensor<1x64xf32>
+    %9941 = "ttir.relu"(%9939, %9940) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9942 = tensor.empty() : tensor<1x64xf32>
+    %9943 = "ttir.relu"(%9941, %9942) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9944 = tensor.empty() : tensor<1x64xf32>
+    %9945 = "ttir.relu"(%9943, %9944) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9946 = tensor.empty() : tensor<1x64xf32>
+    %9947 = "ttir.relu"(%9945, %9946) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9948 = tensor.empty() : tensor<1x64xf32>
+    %9949 = "ttir.relu"(%9947, %9948) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9950 = tensor.empty() : tensor<1x64xf32>
+    %9951 = "ttir.relu"(%9949, %9950) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9952 = tensor.empty() : tensor<1x64xf32>
+    %9953 = "ttir.relu"(%9951, %9952) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9954 = tensor.empty() : tensor<1x64xf32>
+    %9955 = "ttir.relu"(%9953, %9954) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9956 = tensor.empty() : tensor<1x64xf32>
+    %9957 = "ttir.relu"(%9955, %9956) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9958 = tensor.empty() : tensor<1x64xf32>
+    %9959 = "ttir.relu"(%9957, %9958) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9960 = tensor.empty() : tensor<1x64xf32>
+    %9961 = "ttir.relu"(%9959, %9960) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9962 = tensor.empty() : tensor<1x64xf32>
+    %9963 = "ttir.relu"(%9961, %9962) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9964 = tensor.empty() : tensor<1x64xf32>
+    %9965 = "ttir.relu"(%9963, %9964) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9966 = tensor.empty() : tensor<1x64xf32>
+    %9967 = "ttir.relu"(%9965, %9966) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9968 = tensor.empty() : tensor<1x64xf32>
+    %9969 = "ttir.relu"(%9967, %9968) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9970 = tensor.empty() : tensor<1x64xf32>
+    %9971 = "ttir.relu"(%9969, %9970) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9972 = tensor.empty() : tensor<1x64xf32>
+    %9973 = "ttir.relu"(%9971, %9972) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9974 = tensor.empty() : tensor<1x64xf32>
+    %9975 = "ttir.relu"(%9973, %9974) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9976 = tensor.empty() : tensor<1x64xf32>
+    %9977 = "ttir.relu"(%9975, %9976) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9978 = tensor.empty() : tensor<1x64xf32>
+    %9979 = "ttir.relu"(%9977, %9978) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9980 = tensor.empty() : tensor<1x64xf32>
+    %9981 = "ttir.relu"(%9979, %9980) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9982 = tensor.empty() : tensor<1x64xf32>
+    %9983 = "ttir.relu"(%9981, %9982) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9984 = tensor.empty() : tensor<1x64xf32>
+    %9985 = "ttir.relu"(%9983, %9984) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9986 = tensor.empty() : tensor<1x64xf32>
+    %9987 = "ttir.relu"(%9985, %9986) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9988 = tensor.empty() : tensor<1x64xf32>
+    %9989 = "ttir.relu"(%9987, %9988) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9990 = tensor.empty() : tensor<1x64xf32>
+    %9991 = "ttir.relu"(%9989, %9990) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9992 = tensor.empty() : tensor<1x64xf32>
+    %9993 = "ttir.relu"(%9991, %9992) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9994 = tensor.empty() : tensor<1x64xf32>
+    %9995 = "ttir.relu"(%9993, %9994) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9996 = tensor.empty() : tensor<1x64xf32>
+    %9997 = "ttir.relu"(%9995, %9996) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9998 = tensor.empty() : tensor<1x64xf32>
+    %9999 = "ttir.relu"(%9997, %9998) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    return %9999 : tensor<1x64xf32>
+  }
+}
diff --git a/tools/explorer/test/models/test_1k_ops.mlir b/tools/explorer/test/models/test_1k_ops.mlir
new file mode 100644
index 000000000..fc29c77c0
--- /dev/null
+++ b/tools/explorer/test/models/test_1k_ops.mlir
@@ -0,0 +1,1005 @@
+module @Test10k attributes {} {
+  func.func @forward(%arg0: tensor<1x64xf32> {ttir.name = "input_1"}) -> (tensor<1x64xf32> {ttir.name = "TEST10k"}) {
+    %0 = tensor.empty() : tensor<1x64xf32>
+    %1 = "ttir.relu"(%arg0, %0) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %2 = tensor.empty() : tensor<1x64xf32>
+    %3 = "ttir.relu"(%1, %2) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %4 = tensor.empty() : tensor<1x64xf32>
+    %5 = "ttir.relu"(%3, %4) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %6 = tensor.empty() : tensor<1x64xf32>
+    %7 = "ttir.relu"(%5, %6) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %8 = tensor.empty() : tensor<1x64xf32>
+    %9 = "ttir.relu"(%7, %8) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %10 = tensor.empty() : tensor<1x64xf32>
+    %11 = "ttir.relu"(%9, %10) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %12 = tensor.empty() : tensor<1x64xf32>
+    %13 = "ttir.relu"(%11, %12) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %14 = tensor.empty() : tensor<1x64xf32>
+    %15 = "ttir.relu"(%13, %14) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %16 = tensor.empty() : tensor<1x64xf32>
+    %17 = "ttir.relu"(%15, %16) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %18 = tensor.empty() : tensor<1x64xf32>
+    %19 = "ttir.relu"(%17, %18) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %20 = tensor.empty() : tensor<1x64xf32>
+    %21 = "ttir.relu"(%19, %20) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %22 = tensor.empty() : tensor<1x64xf32>
+    %23 = "ttir.relu"(%21, %22) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %24 = tensor.empty() : tensor<1x64xf32>
+    %25 = "ttir.relu"(%23, %24) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %26 = tensor.empty() : tensor<1x64xf32>
+    %27 = "ttir.relu"(%25, %26) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %28 = tensor.empty() : tensor<1x64xf32>
+    %29 = "ttir.relu"(%27, %28) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %30 = tensor.empty() : tensor<1x64xf32>
+    %31 = "ttir.relu"(%29, %30) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %32 = tensor.empty() : tensor<1x64xf32>
+    %33 = "ttir.relu"(%31, %32) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %34 = tensor.empty() : tensor<1x64xf32>
+    %35 = "ttir.relu"(%33, %34) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %36 = tensor.empty() : tensor<1x64xf32>
+    %37 = "ttir.relu"(%35, %36) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %38 = tensor.empty() : tensor<1x64xf32>
+    %39 = "ttir.relu"(%37, %38) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %40 = tensor.empty() : tensor<1x64xf32>
+    %41 = "ttir.relu"(%39, %40) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %42 = tensor.empty() : tensor<1x64xf32>
+    %43 = "ttir.relu"(%41, %42) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %44 = tensor.empty() : tensor<1x64xf32>
+    %45 = "ttir.relu"(%43, %44) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %46 = tensor.empty() : tensor<1x64xf32>
+    %47 = "ttir.relu"(%45, %46) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %48 = tensor.empty() : tensor<1x64xf32>
+    %49 = "ttir.relu"(%47, %48) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %50 = tensor.empty() : tensor<1x64xf32>
+    %51 = "ttir.relu"(%49, %50) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %52 = tensor.empty() : tensor<1x64xf32>
+    %53 = "ttir.relu"(%51, %52) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %54 = tensor.empty() : tensor<1x64xf32>
+    %55 = "ttir.relu"(%53, %54) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %56 = tensor.empty() : tensor<1x64xf32>
+    %57 = "ttir.relu"(%55, %56) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %58 = tensor.empty() : tensor<1x64xf32>
+    %59 = "ttir.relu"(%57, %58) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %60 = tensor.empty() : tensor<1x64xf32>
+    %61 = "ttir.relu"(%59, %60) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %62 = tensor.empty() : tensor<1x64xf32>
+    %63 = "ttir.relu"(%61, %62) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %64 = tensor.empty() : tensor<1x64xf32>
+    %65 = "ttir.relu"(%63, %64) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %66 = tensor.empty() : tensor<1x64xf32>
+    %67 = "ttir.relu"(%65, %66) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %68 = tensor.empty() : tensor<1x64xf32>
+    %69 = "ttir.relu"(%67, %68) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %70 = tensor.empty() : tensor<1x64xf32>
+    %71 = "ttir.relu"(%69, %70) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %72 = tensor.empty() : tensor<1x64xf32>
+    %73 = "ttir.relu"(%71, %72) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %74 = tensor.empty() : tensor<1x64xf32>
+    %75 = "ttir.relu"(%73, %74) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %76 = tensor.empty() : tensor<1x64xf32>
+    %77 = "ttir.relu"(%75, %76) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %78 = tensor.empty() : tensor<1x64xf32>
+    %79 = "ttir.relu"(%77, %78) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %80 = tensor.empty() : tensor<1x64xf32>
+    %81 = "ttir.relu"(%79, %80) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %82 = tensor.empty() : tensor<1x64xf32>
+    %83 = "ttir.relu"(%81, %82) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %84 = tensor.empty() : tensor<1x64xf32>
+    %85 = "ttir.relu"(%83, %84) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %86 = tensor.empty() : tensor<1x64xf32>
+    %87 = "ttir.relu"(%85, %86) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %88 = tensor.empty() : tensor<1x64xf32>
+    %89 = "ttir.relu"(%87, %88) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %90 = tensor.empty() : tensor<1x64xf32>
+    %91 = "ttir.relu"(%89, %90) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %92 = tensor.empty() : tensor<1x64xf32>
+    %93 = "ttir.relu"(%91, %92) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %94 = tensor.empty() : tensor<1x64xf32>
+    %95 = "ttir.relu"(%93, %94) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %96 = tensor.empty() : tensor<1x64xf32>
+    %97 = "ttir.relu"(%95, %96) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %98 = tensor.empty() : tensor<1x64xf32>
+    %99 = "ttir.relu"(%97, %98) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %100 = tensor.empty() : tensor<1x64xf32>
+    %101 = "ttir.relu"(%99, %100) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %102 = tensor.empty() : tensor<1x64xf32>
+    %103 = "ttir.relu"(%101, %102) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %104 = tensor.empty() : tensor<1x64xf32>
+    %105 = "ttir.relu"(%103, %104) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %106 = tensor.empty() : tensor<1x64xf32>
+    %107 = "ttir.relu"(%105, %106) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %108 = tensor.empty() : tensor<1x64xf32>
+    %109 = "ttir.relu"(%107, %108) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %110 = tensor.empty() : tensor<1x64xf32>
+    %111 = "ttir.relu"(%109, %110) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %112 = tensor.empty() : tensor<1x64xf32>
+    %113 = "ttir.relu"(%111, %112) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %114 = tensor.empty() : tensor<1x64xf32>
+    %115 = "ttir.relu"(%113, %114) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %116 = tensor.empty() : tensor<1x64xf32>
+    %117 = "ttir.relu"(%115, %116) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %118 = tensor.empty() : tensor<1x64xf32>
+    %119 = "ttir.relu"(%117, %118) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %120 = tensor.empty() : tensor<1x64xf32>
+    %121 = "ttir.relu"(%119, %120) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %122 = tensor.empty() : tensor<1x64xf32>
+    %123 = "ttir.relu"(%121, %122) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %124 = tensor.empty() : tensor<1x64xf32>
+    %125 = "ttir.relu"(%123, %124) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %126 = tensor.empty() : tensor<1x64xf32>
+    %127 = "ttir.relu"(%125, %126) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %128 = tensor.empty() : tensor<1x64xf32>
+    %129 = "ttir.relu"(%127, %128) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %130 = tensor.empty() : tensor<1x64xf32>
+    %131 = "ttir.relu"(%129, %130) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %132 = tensor.empty() : tensor<1x64xf32>
+    %133 = "ttir.relu"(%131, %132) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %134 = tensor.empty() : tensor<1x64xf32>
+    %135 = "ttir.relu"(%133, %134) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %136 = tensor.empty() : tensor<1x64xf32>
+    %137 = "ttir.relu"(%135, %136) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %138 = tensor.empty() : tensor<1x64xf32>
+    %139 = "ttir.relu"(%137, %138) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %140 = tensor.empty() : tensor<1x64xf32>
+    %141 = "ttir.relu"(%139, %140) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %142 = tensor.empty() : tensor<1x64xf32>
+    %143 = "ttir.relu"(%141, %142) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %144 = tensor.empty() : tensor<1x64xf32>
+    %145 = "ttir.relu"(%143, %144) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %146 = tensor.empty() : tensor<1x64xf32>
+    %147 = "ttir.relu"(%145, %146) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %148 = tensor.empty() : tensor<1x64xf32>
+    %149 = "ttir.relu"(%147, %148) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %150 = tensor.empty() : tensor<1x64xf32>
+    %151 = "ttir.relu"(%149, %150) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %152 = tensor.empty() : tensor<1x64xf32>
+    %153 = "ttir.relu"(%151, %152) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %154 = tensor.empty() : tensor<1x64xf32>
+    %155 = "ttir.relu"(%153, %154) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %156 = tensor.empty() : tensor<1x64xf32>
+    %157 = "ttir.relu"(%155, %156) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %158 = tensor.empty() : tensor<1x64xf32>
+    %159 = "ttir.relu"(%157, %158) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %160 = tensor.empty() : tensor<1x64xf32>
+    %161 = "ttir.relu"(%159, %160) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %162 = tensor.empty() : tensor<1x64xf32>
+    %163 = "ttir.relu"(%161, %162) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %164 = tensor.empty() : tensor<1x64xf32>
+    %165 = "ttir.relu"(%163, %164) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %166 = tensor.empty() : tensor<1x64xf32>
+    %167 = "ttir.relu"(%165, %166) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %168 = tensor.empty() : tensor<1x64xf32>
+    %169 = "ttir.relu"(%167, %168) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %170 = tensor.empty() : tensor<1x64xf32>
+    %171 = "ttir.relu"(%169, %170) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %172 = tensor.empty() : tensor<1x64xf32>
+    %173 = "ttir.relu"(%171, %172) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %174 = tensor.empty() : tensor<1x64xf32>
+    %175 = "ttir.relu"(%173, %174) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %176 = tensor.empty() : tensor<1x64xf32>
+    %177 = "ttir.relu"(%175, %176) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %178 = tensor.empty() : tensor<1x64xf32>
+    %179 = "ttir.relu"(%177, %178) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %180 = tensor.empty() : tensor<1x64xf32>
+    %181 = "ttir.relu"(%179, %180) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %182 = tensor.empty() : tensor<1x64xf32>
+    %183 = "ttir.relu"(%181, %182) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %184 = tensor.empty() : tensor<1x64xf32>
+    %185 = "ttir.relu"(%183, %184) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %186 = tensor.empty() : tensor<1x64xf32>
+    %187 = "ttir.relu"(%185, %186) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %188 = tensor.empty() : tensor<1x64xf32>
+    %189 = "ttir.relu"(%187, %188) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %190 = tensor.empty() : tensor<1x64xf32>
+    %191 = "ttir.relu"(%189, %190) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %192 = tensor.empty() : tensor<1x64xf32>
+    %193 = "ttir.relu"(%191, %192) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %194 = tensor.empty() : tensor<1x64xf32>
+    %195 = "ttir.relu"(%193, %194) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %196 = tensor.empty() : tensor<1x64xf32>
+    %197 = "ttir.relu"(%195, %196) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %198 = tensor.empty() : tensor<1x64xf32>
+    %199 = "ttir.relu"(%197, %198) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %200 = tensor.empty() : tensor<1x64xf32>
+    %201 = "ttir.relu"(%199, %200) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %202 = tensor.empty() : tensor<1x64xf32>
+    %203 = "ttir.relu"(%201, %202) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %204 = tensor.empty() : tensor<1x64xf32>
+    %205 = "ttir.relu"(%203, %204) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %206 = tensor.empty() : tensor<1x64xf32>
+    %207 = "ttir.relu"(%205, %206) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %208 = tensor.empty() : tensor<1x64xf32>
+    %209 = "ttir.relu"(%207, %208) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %210 = tensor.empty() : tensor<1x64xf32>
+    %211 = "ttir.relu"(%209, %210) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %212 = tensor.empty() : tensor<1x64xf32>
+    %213 = "ttir.relu"(%211, %212) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %214 = tensor.empty() : tensor<1x64xf32>
+    %215 = "ttir.relu"(%213, %214) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %216 = tensor.empty() : tensor<1x64xf32>
+    %217 = "ttir.relu"(%215, %216) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %218 = tensor.empty() : tensor<1x64xf32>
+    %219 = "ttir.relu"(%217, %218) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %220 = tensor.empty() : tensor<1x64xf32>
+    %221 = "ttir.relu"(%219, %220) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %222 = tensor.empty() : tensor<1x64xf32>
+    %223 = "ttir.relu"(%221, %222) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %224 = tensor.empty() : tensor<1x64xf32>
+    %225 = "ttir.relu"(%223, %224) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %226 = tensor.empty() : tensor<1x64xf32>
+    %227 = "ttir.relu"(%225, %226) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %228 = tensor.empty() : tensor<1x64xf32>
+    %229 = "ttir.relu"(%227, %228) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %230 = tensor.empty() : tensor<1x64xf32>
+    %231 = "ttir.relu"(%229, %230) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %232 = tensor.empty() : tensor<1x64xf32>
+    %233 = "ttir.relu"(%231, %232) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %234 = tensor.empty() : tensor<1x64xf32>
+    %235 = "ttir.relu"(%233, %234) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %236 = tensor.empty() : tensor<1x64xf32>
+    %237 = "ttir.relu"(%235, %236) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %238 = tensor.empty() : tensor<1x64xf32>
+    %239 = "ttir.relu"(%237, %238) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %240 = tensor.empty() : tensor<1x64xf32>
+    %241 = "ttir.relu"(%239, %240) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %242 = tensor.empty() : tensor<1x64xf32>
+    %243 = "ttir.relu"(%241, %242) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %244 = tensor.empty() : tensor<1x64xf32>
+    %245 = "ttir.relu"(%243, %244) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %246 = tensor.empty() : tensor<1x64xf32>
+    %247 = "ttir.relu"(%245, %246) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %248 = tensor.empty() : tensor<1x64xf32>
+    %249 = "ttir.relu"(%247, %248) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %250 = tensor.empty() : tensor<1x64xf32>
+    %251 = "ttir.relu"(%249, %250) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %252 = tensor.empty() : tensor<1x64xf32>
+    %253 = "ttir.relu"(%251, %252) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %254 = tensor.empty() : tensor<1x64xf32>
+    %255 = "ttir.relu"(%253, %254) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %256 = tensor.empty() : tensor<1x64xf32>
+    %257 = "ttir.relu"(%255, %256) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %258 = tensor.empty() : tensor<1x64xf32>
+    %259 = "ttir.relu"(%257, %258) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %260 = tensor.empty() : tensor<1x64xf32>
+    %261 = "ttir.relu"(%259, %260) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %262 = tensor.empty() : tensor<1x64xf32>
+    %263 = "ttir.relu"(%261, %262) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %264 = tensor.empty() : tensor<1x64xf32>
+    %265 = "ttir.relu"(%263, %264) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %266 = tensor.empty() : tensor<1x64xf32>
+    %267 = "ttir.relu"(%265, %266) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %268 = tensor.empty() : tensor<1x64xf32>
+    %269 = "ttir.relu"(%267, %268) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %270 = tensor.empty() : tensor<1x64xf32>
+    %271 = "ttir.relu"(%269, %270) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %272 = tensor.empty() : tensor<1x64xf32>
+    %273 = "ttir.relu"(%271, %272) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %274 = tensor.empty() : tensor<1x64xf32>
+    %275 = "ttir.relu"(%273, %274) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %276 = tensor.empty() : tensor<1x64xf32>
+    %277 = "ttir.relu"(%275, %276) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %278 = tensor.empty() : tensor<1x64xf32>
+    %279 = "ttir.relu"(%277, %278) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %280 = tensor.empty() : tensor<1x64xf32>
+    %281 = "ttir.relu"(%279, %280) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %282 = tensor.empty() : tensor<1x64xf32>
+    %283 = "ttir.relu"(%281, %282) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %284 = tensor.empty() : tensor<1x64xf32>
+    %285 = "ttir.relu"(%283, %284) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %286 = tensor.empty() : tensor<1x64xf32>
+    %287 = "ttir.relu"(%285, %286) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %288 = tensor.empty() : tensor<1x64xf32>
+    %289 = "ttir.relu"(%287, %288) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %290 = tensor.empty() : tensor<1x64xf32>
+    %291 = "ttir.relu"(%289, %290) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %292 = tensor.empty() : tensor<1x64xf32>
+    %293 = "ttir.relu"(%291, %292) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %294 = tensor.empty() : tensor<1x64xf32>
+    %295 = "ttir.relu"(%293, %294) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %296 = tensor.empty() : tensor<1x64xf32>
+    %297 = "ttir.relu"(%295, %296) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %298 = tensor.empty() : tensor<1x64xf32>
+    %299 = "ttir.relu"(%297, %298) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %300 = tensor.empty() : tensor<1x64xf32>
+    %301 = "ttir.relu"(%299, %300) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %302 = tensor.empty() : tensor<1x64xf32>
+    %303 = "ttir.relu"(%301, %302) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %304 = tensor.empty() : tensor<1x64xf32>
+    %305 = "ttir.relu"(%303, %304) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %306 = tensor.empty() : tensor<1x64xf32>
+    %307 = "ttir.relu"(%305, %306) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %308 = tensor.empty() : tensor<1x64xf32>
+    %309 = "ttir.relu"(%307, %308) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %310 = tensor.empty() : tensor<1x64xf32>
+    %311 = "ttir.relu"(%309, %310) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %312 = tensor.empty() : tensor<1x64xf32>
+    %313 = "ttir.relu"(%311, %312) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %314 = tensor.empty() : tensor<1x64xf32>
+    %315 = "ttir.relu"(%313, %314) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %316 = tensor.empty() : tensor<1x64xf32>
+    %317 = "ttir.relu"(%315, %316) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %318 = tensor.empty() : tensor<1x64xf32>
+    %319 = "ttir.relu"(%317, %318) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %320 = tensor.empty() : tensor<1x64xf32>
+    %321 = "ttir.relu"(%319, %320) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %322 = tensor.empty() : tensor<1x64xf32>
+    %323 = "ttir.relu"(%321, %322) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %324 = tensor.empty() : tensor<1x64xf32>
+    %325 = "ttir.relu"(%323, %324) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %326 = tensor.empty() : tensor<1x64xf32>
+    %327 = "ttir.relu"(%325, %326) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %328 = tensor.empty() : tensor<1x64xf32>
+    %329 = "ttir.relu"(%327, %328) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %330 = tensor.empty() : tensor<1x64xf32>
+    %331 = "ttir.relu"(%329, %330) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %332 = tensor.empty() : tensor<1x64xf32>
+    %333 = "ttir.relu"(%331, %332) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %334 = tensor.empty() : tensor<1x64xf32>
+    %335 = "ttir.relu"(%333, %334) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %336 = tensor.empty() : tensor<1x64xf32>
+    %337 = "ttir.relu"(%335, %336) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %338 = tensor.empty() : tensor<1x64xf32>
+    %339 = "ttir.relu"(%337, %338) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %340 = tensor.empty() : tensor<1x64xf32>
+    %341 = "ttir.relu"(%339, %340) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %342 = tensor.empty() : tensor<1x64xf32>
+    %343 = "ttir.relu"(%341, %342) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %344 = tensor.empty() : tensor<1x64xf32>
+    %345 = "ttir.relu"(%343, %344) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %346 = tensor.empty() : tensor<1x64xf32>
+    %347 = "ttir.relu"(%345, %346) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %348 = tensor.empty() : tensor<1x64xf32>
+    %349 = "ttir.relu"(%347, %348) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %350 = tensor.empty() : tensor<1x64xf32>
+    %351 = "ttir.relu"(%349, %350) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %352 = tensor.empty() : tensor<1x64xf32>
+    %353 = "ttir.relu"(%351, %352) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %354 = tensor.empty() : tensor<1x64xf32>
+    %355 = "ttir.relu"(%353, %354) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %356 = tensor.empty() : tensor<1x64xf32>
+    %357 = "ttir.relu"(%355, %356) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %358 = tensor.empty() : tensor<1x64xf32>
+    %359 = "ttir.relu"(%357, %358) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %360 = tensor.empty() : tensor<1x64xf32>
+    %361 = "ttir.relu"(%359, %360) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %362 = tensor.empty() : tensor<1x64xf32>
+    %363 = "ttir.relu"(%361, %362) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %364 = tensor.empty() : tensor<1x64xf32>
+    %365 = "ttir.relu"(%363, %364) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %366 = tensor.empty() : tensor<1x64xf32>
+    %367 = "ttir.relu"(%365, %366) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %368 = tensor.empty() : tensor<1x64xf32>
+    %369 = "ttir.relu"(%367, %368) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %370 = tensor.empty() : tensor<1x64xf32>
+    %371 = "ttir.relu"(%369, %370) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %372 = tensor.empty() : tensor<1x64xf32>
+    %373 = "ttir.relu"(%371, %372) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %374 = tensor.empty() : tensor<1x64xf32>
+    %375 = "ttir.relu"(%373, %374) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %376 = tensor.empty() : tensor<1x64xf32>
+    %377 = "ttir.relu"(%375, %376) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %378 = tensor.empty() : tensor<1x64xf32>
+    %379 = "ttir.relu"(%377, %378) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %380 = tensor.empty() : tensor<1x64xf32>
+    %381 = "ttir.relu"(%379, %380) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %382 = tensor.empty() : tensor<1x64xf32>
+    %383 = "ttir.relu"(%381, %382) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %384 = tensor.empty() : tensor<1x64xf32>
+    %385 = "ttir.relu"(%383, %384) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %386 = tensor.empty() : tensor<1x64xf32>
+    %387 = "ttir.relu"(%385, %386) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %388 = tensor.empty() : tensor<1x64xf32>
+    %389 = "ttir.relu"(%387, %388) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %390 = tensor.empty() : tensor<1x64xf32>
+    %391 = "ttir.relu"(%389, %390) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %392 = tensor.empty() : tensor<1x64xf32>
+    %393 = "ttir.relu"(%391, %392) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %394 = tensor.empty() : tensor<1x64xf32>
+    %395 = "ttir.relu"(%393, %394) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %396 = tensor.empty() : tensor<1x64xf32>
+    %397 = "ttir.relu"(%395, %396) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %398 = tensor.empty() : tensor<1x64xf32>
+    %399 = "ttir.relu"(%397, %398) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %400 = tensor.empty() : tensor<1x64xf32>
+    %401 = "ttir.relu"(%399, %400) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %402 = tensor.empty() : tensor<1x64xf32>
+    %403 = "ttir.relu"(%401, %402) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %404 = tensor.empty() : tensor<1x64xf32>
+    %405 = "ttir.relu"(%403, %404) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %406 = tensor.empty() : tensor<1x64xf32>
+    %407 = "ttir.relu"(%405, %406) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %408 = tensor.empty() : tensor<1x64xf32>
+    %409 = "ttir.relu"(%407, %408) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %410 = tensor.empty() : tensor<1x64xf32>
+    %411 = "ttir.relu"(%409, %410) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %412 = tensor.empty() : tensor<1x64xf32>
+    %413 = "ttir.relu"(%411, %412) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %414 = tensor.empty() : tensor<1x64xf32>
+    %415 = "ttir.relu"(%413, %414) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %416 = tensor.empty() : tensor<1x64xf32>
+    %417 = "ttir.relu"(%415, %416) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %418 = tensor.empty() : tensor<1x64xf32>
+    %419 = "ttir.relu"(%417, %418) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %420 = tensor.empty() : tensor<1x64xf32>
+    %421 = "ttir.relu"(%419, %420) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %422 = tensor.empty() : tensor<1x64xf32>
+    %423 = "ttir.relu"(%421, %422) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %424 = tensor.empty() : tensor<1x64xf32>
+    %425 = "ttir.relu"(%423, %424) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %426 = tensor.empty() : tensor<1x64xf32>
+    %427 = "ttir.relu"(%425, %426) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %428 = tensor.empty() : tensor<1x64xf32>
+    %429 = "ttir.relu"(%427, %428) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %430 = tensor.empty() : tensor<1x64xf32>
+    %431 = "ttir.relu"(%429, %430) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %432 = tensor.empty() : tensor<1x64xf32>
+    %433 = "ttir.relu"(%431, %432) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %434 = tensor.empty() : tensor<1x64xf32>
+    %435 = "ttir.relu"(%433, %434) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %436 = tensor.empty() : tensor<1x64xf32>
+    %437 = "ttir.relu"(%435, %436) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %438 = tensor.empty() : tensor<1x64xf32>
+    %439 = "ttir.relu"(%437, %438) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %440 = tensor.empty() : tensor<1x64xf32>
+    %441 = "ttir.relu"(%439, %440) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %442 = tensor.empty() : tensor<1x64xf32>
+    %443 = "ttir.relu"(%441, %442) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %444 = tensor.empty() : tensor<1x64xf32>
+    %445 = "ttir.relu"(%443, %444) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %446 = tensor.empty() : tensor<1x64xf32>
+    %447 = "ttir.relu"(%445, %446) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %448 = tensor.empty() : tensor<1x64xf32>
+    %449 = "ttir.relu"(%447, %448) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %450 = tensor.empty() : tensor<1x64xf32>
+    %451 = "ttir.relu"(%449, %450) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %452 = tensor.empty() : tensor<1x64xf32>
+    %453 = "ttir.relu"(%451, %452) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %454 = tensor.empty() : tensor<1x64xf32>
+    %455 = "ttir.relu"(%453, %454) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %456 = tensor.empty() : tensor<1x64xf32>
+    %457 = "ttir.relu"(%455, %456) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %458 = tensor.empty() : tensor<1x64xf32>
+    %459 = "ttir.relu"(%457, %458) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %460 = tensor.empty() : tensor<1x64xf32>
+    %461 = "ttir.relu"(%459, %460) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %462 = tensor.empty() : tensor<1x64xf32>
+    %463 = "ttir.relu"(%461, %462) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %464 = tensor.empty() : tensor<1x64xf32>
+    %465 = "ttir.relu"(%463, %464) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %466 = tensor.empty() : tensor<1x64xf32>
+    %467 = "ttir.relu"(%465, %466) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %468 = tensor.empty() : tensor<1x64xf32>
+    %469 = "ttir.relu"(%467, %468) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %470 = tensor.empty() : tensor<1x64xf32>
+    %471 = "ttir.relu"(%469, %470) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %472 = tensor.empty() : tensor<1x64xf32>
+    %473 = "ttir.relu"(%471, %472) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %474 = tensor.empty() : tensor<1x64xf32>
+    %475 = "ttir.relu"(%473, %474) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %476 = tensor.empty() : tensor<1x64xf32>
+    %477 = "ttir.relu"(%475, %476) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %478 = tensor.empty() : tensor<1x64xf32>
+    %479 = "ttir.relu"(%477, %478) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %480 = tensor.empty() : tensor<1x64xf32>
+    %481 = "ttir.relu"(%479, %480) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %482 = tensor.empty() : tensor<1x64xf32>
+    %483 = "ttir.relu"(%481, %482) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %484 = tensor.empty() : tensor<1x64xf32>
+    %485 = "ttir.relu"(%483, %484) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %486 = tensor.empty() : tensor<1x64xf32>
+    %487 = "ttir.relu"(%485, %486) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %488 = tensor.empty() : tensor<1x64xf32>
+    %489 = "ttir.relu"(%487, %488) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %490 = tensor.empty() : tensor<1x64xf32>
+    %491 = "ttir.relu"(%489, %490) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %492 = tensor.empty() : tensor<1x64xf32>
+    %493 = "ttir.relu"(%491, %492) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %494 = tensor.empty() : tensor<1x64xf32>
+    %495 = "ttir.relu"(%493, %494) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %496 = tensor.empty() : tensor<1x64xf32>
+    %497 = "ttir.relu"(%495, %496) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %498 = tensor.empty() : tensor<1x64xf32>
+    %499 = "ttir.relu"(%497, %498) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %500 = tensor.empty() : tensor<1x64xf32>
+    %501 = "ttir.relu"(%499, %500) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %502 = tensor.empty() : tensor<1x64xf32>
+    %503 = "ttir.relu"(%501, %502) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %504 = tensor.empty() : tensor<1x64xf32>
+    %505 = "ttir.relu"(%503, %504) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %506 = tensor.empty() : tensor<1x64xf32>
+    %507 = "ttir.relu"(%505, %506) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %508 = tensor.empty() : tensor<1x64xf32>
+    %509 = "ttir.relu"(%507, %508) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %510 = tensor.empty() : tensor<1x64xf32>
+    %511 = "ttir.relu"(%509, %510) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %512 = tensor.empty() : tensor<1x64xf32>
+    %513 = "ttir.relu"(%511, %512) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %514 = tensor.empty() : tensor<1x64xf32>
+    %515 = "ttir.relu"(%513, %514) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %516 = tensor.empty() : tensor<1x64xf32>
+    %517 = "ttir.relu"(%515, %516) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %518 = tensor.empty() : tensor<1x64xf32>
+    %519 = "ttir.relu"(%517, %518) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %520 = tensor.empty() : tensor<1x64xf32>
+    %521 = "ttir.relu"(%519, %520) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %522 = tensor.empty() : tensor<1x64xf32>
+    %523 = "ttir.relu"(%521, %522) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %524 = tensor.empty() : tensor<1x64xf32>
+    %525 = "ttir.relu"(%523, %524) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %526 = tensor.empty() : tensor<1x64xf32>
+    %527 = "ttir.relu"(%525, %526) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %528 = tensor.empty() : tensor<1x64xf32>
+    %529 = "ttir.relu"(%527, %528) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %530 = tensor.empty() : tensor<1x64xf32>
+    %531 = "ttir.relu"(%529, %530) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %532 = tensor.empty() : tensor<1x64xf32>
+    %533 = "ttir.relu"(%531, %532) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %534 = tensor.empty() : tensor<1x64xf32>
+    %535 = "ttir.relu"(%533, %534) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %536 = tensor.empty() : tensor<1x64xf32>
+    %537 = "ttir.relu"(%535, %536) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %538 = tensor.empty() : tensor<1x64xf32>
+    %539 = "ttir.relu"(%537, %538) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %540 = tensor.empty() : tensor<1x64xf32>
+    %541 = "ttir.relu"(%539, %540) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %542 = tensor.empty() : tensor<1x64xf32>
+    %543 = "ttir.relu"(%541, %542) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %544 = tensor.empty() : tensor<1x64xf32>
+    %545 = "ttir.relu"(%543, %544) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %546 = tensor.empty() : tensor<1x64xf32>
+    %547 = "ttir.relu"(%545, %546) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %548 = tensor.empty() : tensor<1x64xf32>
+    %549 = "ttir.relu"(%547, %548) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %550 = tensor.empty() : tensor<1x64xf32>
+    %551 = "ttir.relu"(%549, %550) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %552 = tensor.empty() : tensor<1x64xf32>
+    %553 = "ttir.relu"(%551, %552) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %554 = tensor.empty() : tensor<1x64xf32>
+    %555 = "ttir.relu"(%553, %554) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %556 = tensor.empty() : tensor<1x64xf32>
+    %557 = "ttir.relu"(%555, %556) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %558 = tensor.empty() : tensor<1x64xf32>
+    %559 = "ttir.relu"(%557, %558) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %560 = tensor.empty() : tensor<1x64xf32>
+    %561 = "ttir.relu"(%559, %560) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %562 = tensor.empty() : tensor<1x64xf32>
+    %563 = "ttir.relu"(%561, %562) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %564 = tensor.empty() : tensor<1x64xf32>
+    %565 = "ttir.relu"(%563, %564) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %566 = tensor.empty() : tensor<1x64xf32>
+    %567 = "ttir.relu"(%565, %566) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %568 = tensor.empty() : tensor<1x64xf32>
+    %569 = "ttir.relu"(%567, %568) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %570 = tensor.empty() : tensor<1x64xf32>
+    %571 = "ttir.relu"(%569, %570) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %572 = tensor.empty() : tensor<1x64xf32>
+    %573 = "ttir.relu"(%571, %572) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %574 = tensor.empty() : tensor<1x64xf32>
+    %575 = "ttir.relu"(%573, %574) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %576 = tensor.empty() : tensor<1x64xf32>
+    %577 = "ttir.relu"(%575, %576) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %578 = tensor.empty() : tensor<1x64xf32>
+    %579 = "ttir.relu"(%577, %578) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %580 = tensor.empty() : tensor<1x64xf32>
+    %581 = "ttir.relu"(%579, %580) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %582 = tensor.empty() : tensor<1x64xf32>
+    %583 = "ttir.relu"(%581, %582) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %584 = tensor.empty() : tensor<1x64xf32>
+    %585 = "ttir.relu"(%583, %584) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %586 = tensor.empty() : tensor<1x64xf32>
+    %587 = "ttir.relu"(%585, %586) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %588 = tensor.empty() : tensor<1x64xf32>
+    %589 = "ttir.relu"(%587, %588) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %590 = tensor.empty() : tensor<1x64xf32>
+    %591 = "ttir.relu"(%589, %590) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %592 = tensor.empty() : tensor<1x64xf32>
+    %593 = "ttir.relu"(%591, %592) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %594 = tensor.empty() : tensor<1x64xf32>
+    %595 = "ttir.relu"(%593, %594) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %596 = tensor.empty() : tensor<1x64xf32>
+    %597 = "ttir.relu"(%595, %596) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %598 = tensor.empty() : tensor<1x64xf32>
+    %599 = "ttir.relu"(%597, %598) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %600 = tensor.empty() : tensor<1x64xf32>
+    %601 = "ttir.relu"(%599, %600) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %602 = tensor.empty() : tensor<1x64xf32>
+    %603 = "ttir.relu"(%601, %602) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %604 = tensor.empty() : tensor<1x64xf32>
+    %605 = "ttir.relu"(%603, %604) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %606 = tensor.empty() : tensor<1x64xf32>
+    %607 = "ttir.relu"(%605, %606) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %608 = tensor.empty() : tensor<1x64xf32>
+    %609 = "ttir.relu"(%607, %608) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %610 = tensor.empty() : tensor<1x64xf32>
+    %611 = "ttir.relu"(%609, %610) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %612 = tensor.empty() : tensor<1x64xf32>
+    %613 = "ttir.relu"(%611, %612) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %614 = tensor.empty() : tensor<1x64xf32>
+    %615 = "ttir.relu"(%613, %614) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %616 = tensor.empty() : tensor<1x64xf32>
+    %617 = "ttir.relu"(%615, %616) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %618 = tensor.empty() : tensor<1x64xf32>
+    %619 = "ttir.relu"(%617, %618) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %620 = tensor.empty() : tensor<1x64xf32>
+    %621 = "ttir.relu"(%619, %620) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %622 = tensor.empty() : tensor<1x64xf32>
+    %623 = "ttir.relu"(%621, %622) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %624 = tensor.empty() : tensor<1x64xf32>
+    %625 = "ttir.relu"(%623, %624) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %626 = tensor.empty() : tensor<1x64xf32>
+    %627 = "ttir.relu"(%625, %626) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %628 = tensor.empty() : tensor<1x64xf32>
+    %629 = "ttir.relu"(%627, %628) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %630 = tensor.empty() : tensor<1x64xf32>
+    %631 = "ttir.relu"(%629, %630) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %632 = tensor.empty() : tensor<1x64xf32>
+    %633 = "ttir.relu"(%631, %632) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %634 = tensor.empty() : tensor<1x64xf32>
+    %635 = "ttir.relu"(%633, %634) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %636 = tensor.empty() : tensor<1x64xf32>
+    %637 = "ttir.relu"(%635, %636) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %638 = tensor.empty() : tensor<1x64xf32>
+    %639 = "ttir.relu"(%637, %638) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %640 = tensor.empty() : tensor<1x64xf32>
+    %641 = "ttir.relu"(%639, %640) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %642 = tensor.empty() : tensor<1x64xf32>
+    %643 = "ttir.relu"(%641, %642) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %644 = tensor.empty() : tensor<1x64xf32>
+    %645 = "ttir.relu"(%643, %644) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %646 = tensor.empty() : tensor<1x64xf32>
+    %647 = "ttir.relu"(%645, %646) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %648 = tensor.empty() : tensor<1x64xf32>
+    %649 = "ttir.relu"(%647, %648) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %650 = tensor.empty() : tensor<1x64xf32>
+    %651 = "ttir.relu"(%649, %650) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %652 = tensor.empty() : tensor<1x64xf32>
+    %653 = "ttir.relu"(%651, %652) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %654 = tensor.empty() : tensor<1x64xf32>
+    %655 = "ttir.relu"(%653, %654) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %656 = tensor.empty() : tensor<1x64xf32>
+    %657 = "ttir.relu"(%655, %656) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %658 = tensor.empty() : tensor<1x64xf32>
+    %659 = "ttir.relu"(%657, %658) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %660 = tensor.empty() : tensor<1x64xf32>
+    %661 = "ttir.relu"(%659, %660) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %662 = tensor.empty() : tensor<1x64xf32>
+    %663 = "ttir.relu"(%661, %662) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %664 = tensor.empty() : tensor<1x64xf32>
+    %665 = "ttir.relu"(%663, %664) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %666 = tensor.empty() : tensor<1x64xf32>
+    %667 = "ttir.relu"(%665, %666) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %668 = tensor.empty() : tensor<1x64xf32>
+    %669 = "ttir.relu"(%667, %668) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %670 = tensor.empty() : tensor<1x64xf32>
+    %671 = "ttir.relu"(%669, %670) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %672 = tensor.empty() : tensor<1x64xf32>
+    %673 = "ttir.relu"(%671, %672) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %674 = tensor.empty() : tensor<1x64xf32>
+    %675 = "ttir.relu"(%673, %674) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %676 = tensor.empty() : tensor<1x64xf32>
+    %677 = "ttir.relu"(%675, %676) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %678 = tensor.empty() : tensor<1x64xf32>
+    %679 = "ttir.relu"(%677, %678) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %680 = tensor.empty() : tensor<1x64xf32>
+    %681 = "ttir.relu"(%679, %680) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %682 = tensor.empty() : tensor<1x64xf32>
+    %683 = "ttir.relu"(%681, %682) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %684 = tensor.empty() : tensor<1x64xf32>
+    %685 = "ttir.relu"(%683, %684) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %686 = tensor.empty() : tensor<1x64xf32>
+    %687 = "ttir.relu"(%685, %686) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %688 = tensor.empty() : tensor<1x64xf32>
+    %689 = "ttir.relu"(%687, %688) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %690 = tensor.empty() : tensor<1x64xf32>
+    %691 = "ttir.relu"(%689, %690) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %692 = tensor.empty() : tensor<1x64xf32>
+    %693 = "ttir.relu"(%691, %692) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %694 = tensor.empty() : tensor<1x64xf32>
+    %695 = "ttir.relu"(%693, %694) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %696 = tensor.empty() : tensor<1x64xf32>
+    %697 = "ttir.relu"(%695, %696) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %698 = tensor.empty() : tensor<1x64xf32>
+    %699 = "ttir.relu"(%697, %698) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %700 = tensor.empty() : tensor<1x64xf32>
+    %701 = "ttir.relu"(%699, %700) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %702 = tensor.empty() : tensor<1x64xf32>
+    %703 = "ttir.relu"(%701, %702) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %704 = tensor.empty() : tensor<1x64xf32>
+    %705 = "ttir.relu"(%703, %704) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %706 = tensor.empty() : tensor<1x64xf32>
+    %707 = "ttir.relu"(%705, %706) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %708 = tensor.empty() : tensor<1x64xf32>
+    %709 = "ttir.relu"(%707, %708) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %710 = tensor.empty() : tensor<1x64xf32>
+    %711 = "ttir.relu"(%709, %710) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %712 = tensor.empty() : tensor<1x64xf32>
+    %713 = "ttir.relu"(%711, %712) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %714 = tensor.empty() : tensor<1x64xf32>
+    %715 = "ttir.relu"(%713, %714) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %716 = tensor.empty() : tensor<1x64xf32>
+    %717 = "ttir.relu"(%715, %716) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %718 = tensor.empty() : tensor<1x64xf32>
+    %719 = "ttir.relu"(%717, %718) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %720 = tensor.empty() : tensor<1x64xf32>
+    %721 = "ttir.relu"(%719, %720) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %722 = tensor.empty() : tensor<1x64xf32>
+    %723 = "ttir.relu"(%721, %722) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %724 = tensor.empty() : tensor<1x64xf32>
+    %725 = "ttir.relu"(%723, %724) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %726 = tensor.empty() : tensor<1x64xf32>
+    %727 = "ttir.relu"(%725, %726) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %728 = tensor.empty() : tensor<1x64xf32>
+    %729 = "ttir.relu"(%727, %728) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %730 = tensor.empty() : tensor<1x64xf32>
+    %731 = "ttir.relu"(%729, %730) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %732 = tensor.empty() : tensor<1x64xf32>
+    %733 = "ttir.relu"(%731, %732) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %734 = tensor.empty() : tensor<1x64xf32>
+    %735 = "ttir.relu"(%733, %734) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %736 = tensor.empty() : tensor<1x64xf32>
+    %737 = "ttir.relu"(%735, %736) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %738 = tensor.empty() : tensor<1x64xf32>
+    %739 = "ttir.relu"(%737, %738) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %740 = tensor.empty() : tensor<1x64xf32>
+    %741 = "ttir.relu"(%739, %740) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %742 = tensor.empty() : tensor<1x64xf32>
+    %743 = "ttir.relu"(%741, %742) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %744 = tensor.empty() : tensor<1x64xf32>
+    %745 = "ttir.relu"(%743, %744) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %746 = tensor.empty() : tensor<1x64xf32>
+    %747 = "ttir.relu"(%745, %746) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %748 = tensor.empty() : tensor<1x64xf32>
+    %749 = "ttir.relu"(%747, %748) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %750 = tensor.empty() : tensor<1x64xf32>
+    %751 = "ttir.relu"(%749, %750) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %752 = tensor.empty() : tensor<1x64xf32>
+    %753 = "ttir.relu"(%751, %752) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %754 = tensor.empty() : tensor<1x64xf32>
+    %755 = "ttir.relu"(%753, %754) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %756 = tensor.empty() : tensor<1x64xf32>
+    %757 = "ttir.relu"(%755, %756) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %758 = tensor.empty() : tensor<1x64xf32>
+    %759 = "ttir.relu"(%757, %758) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %760 = tensor.empty() : tensor<1x64xf32>
+    %761 = "ttir.relu"(%759, %760) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %762 = tensor.empty() : tensor<1x64xf32>
+    %763 = "ttir.relu"(%761, %762) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %764 = tensor.empty() : tensor<1x64xf32>
+    %765 = "ttir.relu"(%763, %764) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %766 = tensor.empty() : tensor<1x64xf32>
+    %767 = "ttir.relu"(%765, %766) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %768 = tensor.empty() : tensor<1x64xf32>
+    %769 = "ttir.relu"(%767, %768) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %770 = tensor.empty() : tensor<1x64xf32>
+    %771 = "ttir.relu"(%769, %770) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %772 = tensor.empty() : tensor<1x64xf32>
+    %773 = "ttir.relu"(%771, %772) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %774 = tensor.empty() : tensor<1x64xf32>
+    %775 = "ttir.relu"(%773, %774) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %776 = tensor.empty() : tensor<1x64xf32>
+    %777 = "ttir.relu"(%775, %776) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %778 = tensor.empty() : tensor<1x64xf32>
+    %779 = "ttir.relu"(%777, %778) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %780 = tensor.empty() : tensor<1x64xf32>
+    %781 = "ttir.relu"(%779, %780) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %782 = tensor.empty() : tensor<1x64xf32>
+    %783 = "ttir.relu"(%781, %782) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %784 = tensor.empty() : tensor<1x64xf32>
+    %785 = "ttir.relu"(%783, %784) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %786 = tensor.empty() : tensor<1x64xf32>
+    %787 = "ttir.relu"(%785, %786) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %788 = tensor.empty() : tensor<1x64xf32>
+    %789 = "ttir.relu"(%787, %788) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %790 = tensor.empty() : tensor<1x64xf32>
+    %791 = "ttir.relu"(%789, %790) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %792 = tensor.empty() : tensor<1x64xf32>
+    %793 = "ttir.relu"(%791, %792) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %794 = tensor.empty() : tensor<1x64xf32>
+    %795 = "ttir.relu"(%793, %794) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %796 = tensor.empty() : tensor<1x64xf32>
+    %797 = "ttir.relu"(%795, %796) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %798 = tensor.empty() : tensor<1x64xf32>
+    %799 = "ttir.relu"(%797, %798) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %800 = tensor.empty() : tensor<1x64xf32>
+    %801 = "ttir.relu"(%799, %800) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %802 = tensor.empty() : tensor<1x64xf32>
+    %803 = "ttir.relu"(%801, %802) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %804 = tensor.empty() : tensor<1x64xf32>
+    %805 = "ttir.relu"(%803, %804) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %806 = tensor.empty() : tensor<1x64xf32>
+    %807 = "ttir.relu"(%805, %806) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %808 = tensor.empty() : tensor<1x64xf32>
+    %809 = "ttir.relu"(%807, %808) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %810 = tensor.empty() : tensor<1x64xf32>
+    %811 = "ttir.relu"(%809, %810) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %812 = tensor.empty() : tensor<1x64xf32>
+    %813 = "ttir.relu"(%811, %812) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %814 = tensor.empty() : tensor<1x64xf32>
+    %815 = "ttir.relu"(%813, %814) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %816 = tensor.empty() : tensor<1x64xf32>
+    %817 = "ttir.relu"(%815, %816) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %818 = tensor.empty() : tensor<1x64xf32>
+    %819 = "ttir.relu"(%817, %818) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %820 = tensor.empty() : tensor<1x64xf32>
+    %821 = "ttir.relu"(%819, %820) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %822 = tensor.empty() : tensor<1x64xf32>
+    %823 = "ttir.relu"(%821, %822) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %824 = tensor.empty() : tensor<1x64xf32>
+    %825 = "ttir.relu"(%823, %824) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %826 = tensor.empty() : tensor<1x64xf32>
+    %827 = "ttir.relu"(%825, %826) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %828 = tensor.empty() : tensor<1x64xf32>
+    %829 = "ttir.relu"(%827, %828) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %830 = tensor.empty() : tensor<1x64xf32>
+    %831 = "ttir.relu"(%829, %830) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %832 = tensor.empty() : tensor<1x64xf32>
+    %833 = "ttir.relu"(%831, %832) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %834 = tensor.empty() : tensor<1x64xf32>
+    %835 = "ttir.relu"(%833, %834) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %836 = tensor.empty() : tensor<1x64xf32>
+    %837 = "ttir.relu"(%835, %836) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %838 = tensor.empty() : tensor<1x64xf32>
+    %839 = "ttir.relu"(%837, %838) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %840 = tensor.empty() : tensor<1x64xf32>
+    %841 = "ttir.relu"(%839, %840) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %842 = tensor.empty() : tensor<1x64xf32>
+    %843 = "ttir.relu"(%841, %842) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %844 = tensor.empty() : tensor<1x64xf32>
+    %845 = "ttir.relu"(%843, %844) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %846 = tensor.empty() : tensor<1x64xf32>
+    %847 = "ttir.relu"(%845, %846) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %848 = tensor.empty() : tensor<1x64xf32>
+    %849 = "ttir.relu"(%847, %848) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %850 = tensor.empty() : tensor<1x64xf32>
+    %851 = "ttir.relu"(%849, %850) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %852 = tensor.empty() : tensor<1x64xf32>
+    %853 = "ttir.relu"(%851, %852) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %854 = tensor.empty() : tensor<1x64xf32>
+    %855 = "ttir.relu"(%853, %854) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %856 = tensor.empty() : tensor<1x64xf32>
+    %857 = "ttir.relu"(%855, %856) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %858 = tensor.empty() : tensor<1x64xf32>
+    %859 = "ttir.relu"(%857, %858) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %860 = tensor.empty() : tensor<1x64xf32>
+    %861 = "ttir.relu"(%859, %860) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %862 = tensor.empty() : tensor<1x64xf32>
+    %863 = "ttir.relu"(%861, %862) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %864 = tensor.empty() : tensor<1x64xf32>
+    %865 = "ttir.relu"(%863, %864) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %866 = tensor.empty() : tensor<1x64xf32>
+    %867 = "ttir.relu"(%865, %866) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %868 = tensor.empty() : tensor<1x64xf32>
+    %869 = "ttir.relu"(%867, %868) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %870 = tensor.empty() : tensor<1x64xf32>
+    %871 = "ttir.relu"(%869, %870) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %872 = tensor.empty() : tensor<1x64xf32>
+    %873 = "ttir.relu"(%871, %872) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %874 = tensor.empty() : tensor<1x64xf32>
+    %875 = "ttir.relu"(%873, %874) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %876 = tensor.empty() : tensor<1x64xf32>
+    %877 = "ttir.relu"(%875, %876) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %878 = tensor.empty() : tensor<1x64xf32>
+    %879 = "ttir.relu"(%877, %878) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %880 = tensor.empty() : tensor<1x64xf32>
+    %881 = "ttir.relu"(%879, %880) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %882 = tensor.empty() : tensor<1x64xf32>
+    %883 = "ttir.relu"(%881, %882) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %884 = tensor.empty() : tensor<1x64xf32>
+    %885 = "ttir.relu"(%883, %884) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %886 = tensor.empty() : tensor<1x64xf32>
+    %887 = "ttir.relu"(%885, %886) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %888 = tensor.empty() : tensor<1x64xf32>
+    %889 = "ttir.relu"(%887, %888) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %890 = tensor.empty() : tensor<1x64xf32>
+    %891 = "ttir.relu"(%889, %890) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %892 = tensor.empty() : tensor<1x64xf32>
+    %893 = "ttir.relu"(%891, %892) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %894 = tensor.empty() : tensor<1x64xf32>
+    %895 = "ttir.relu"(%893, %894) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %896 = tensor.empty() : tensor<1x64xf32>
+    %897 = "ttir.relu"(%895, %896) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %898 = tensor.empty() : tensor<1x64xf32>
+    %899 = "ttir.relu"(%897, %898) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %900 = tensor.empty() : tensor<1x64xf32>
+    %901 = "ttir.relu"(%899, %900) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %902 = tensor.empty() : tensor<1x64xf32>
+    %903 = "ttir.relu"(%901, %902) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %904 = tensor.empty() : tensor<1x64xf32>
+    %905 = "ttir.relu"(%903, %904) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %906 = tensor.empty() : tensor<1x64xf32>
+    %907 = "ttir.relu"(%905, %906) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %908 = tensor.empty() : tensor<1x64xf32>
+    %909 = "ttir.relu"(%907, %908) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %910 = tensor.empty() : tensor<1x64xf32>
+    %911 = "ttir.relu"(%909, %910) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %912 = tensor.empty() : tensor<1x64xf32>
+    %913 = "ttir.relu"(%911, %912) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %914 = tensor.empty() : tensor<1x64xf32>
+    %915 = "ttir.relu"(%913, %914) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %916 = tensor.empty() : tensor<1x64xf32>
+    %917 = "ttir.relu"(%915, %916) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %918 = tensor.empty() : tensor<1x64xf32>
+    %919 = "ttir.relu"(%917, %918) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %920 = tensor.empty() : tensor<1x64xf32>
+    %921 = "ttir.relu"(%919, %920) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %922 = tensor.empty() : tensor<1x64xf32>
+    %923 = "ttir.relu"(%921, %922) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %924 = tensor.empty() : tensor<1x64xf32>
+    %925 = "ttir.relu"(%923, %924) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %926 = tensor.empty() : tensor<1x64xf32>
+    %927 = "ttir.relu"(%925, %926) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %928 = tensor.empty() : tensor<1x64xf32>
+    %929 = "ttir.relu"(%927, %928) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %930 = tensor.empty() : tensor<1x64xf32>
+    %931 = "ttir.relu"(%929, %930) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %932 = tensor.empty() : tensor<1x64xf32>
+    %933 = "ttir.relu"(%931, %932) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %934 = tensor.empty() : tensor<1x64xf32>
+    %935 = "ttir.relu"(%933, %934) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %936 = tensor.empty() : tensor<1x64xf32>
+    %937 = "ttir.relu"(%935, %936) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %938 = tensor.empty() : tensor<1x64xf32>
+    %939 = "ttir.relu"(%937, %938) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %940 = tensor.empty() : tensor<1x64xf32>
+    %941 = "ttir.relu"(%939, %940) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %942 = tensor.empty() : tensor<1x64xf32>
+    %943 = "ttir.relu"(%941, %942) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %944 = tensor.empty() : tensor<1x64xf32>
+    %945 = "ttir.relu"(%943, %944) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %946 = tensor.empty() : tensor<1x64xf32>
+    %947 = "ttir.relu"(%945, %946) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %948 = tensor.empty() : tensor<1x64xf32>
+    %949 = "ttir.relu"(%947, %948) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %950 = tensor.empty() : tensor<1x64xf32>
+    %951 = "ttir.relu"(%949, %950) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %952 = tensor.empty() : tensor<1x64xf32>
+    %953 = "ttir.relu"(%951, %952) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %954 = tensor.empty() : tensor<1x64xf32>
+    %955 = "ttir.relu"(%953, %954) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %956 = tensor.empty() : tensor<1x64xf32>
+    %957 = "ttir.relu"(%955, %956) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %958 = tensor.empty() : tensor<1x64xf32>
+    %959 = "ttir.relu"(%957, %958) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %960 = tensor.empty() : tensor<1x64xf32>
+    %961 = "ttir.relu"(%959, %960) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %962 = tensor.empty() : tensor<1x64xf32>
+    %963 = "ttir.relu"(%961, %962) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %964 = tensor.empty() : tensor<1x64xf32>
+    %965 = "ttir.relu"(%963, %964) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %966 = tensor.empty() : tensor<1x64xf32>
+    %967 = "ttir.relu"(%965, %966) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %968 = tensor.empty() : tensor<1x64xf32>
+    %969 = "ttir.relu"(%967, %968) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %970 = tensor.empty() : tensor<1x64xf32>
+    %971 = "ttir.relu"(%969, %970) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %972 = tensor.empty() : tensor<1x64xf32>
+    %973 = "ttir.relu"(%971, %972) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %974 = tensor.empty() : tensor<1x64xf32>
+    %975 = "ttir.relu"(%973, %974) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %976 = tensor.empty() : tensor<1x64xf32>
+    %977 = "ttir.relu"(%975, %976) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %978 = tensor.empty() : tensor<1x64xf32>
+    %979 = "ttir.relu"(%977, %978) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %980 = tensor.empty() : tensor<1x64xf32>
+    %981 = "ttir.relu"(%979, %980) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %982 = tensor.empty() : tensor<1x64xf32>
+    %983 = "ttir.relu"(%981, %982) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %984 = tensor.empty() : tensor<1x64xf32>
+    %985 = "ttir.relu"(%983, %984) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %986 = tensor.empty() : tensor<1x64xf32>
+    %987 = "ttir.relu"(%985, %986) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %988 = tensor.empty() : tensor<1x64xf32>
+    %989 = "ttir.relu"(%987, %988) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %990 = tensor.empty() : tensor<1x64xf32>
+    %991 = "ttir.relu"(%989, %990) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %992 = tensor.empty() : tensor<1x64xf32>
+    %993 = "ttir.relu"(%991, %992) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %994 = tensor.empty() : tensor<1x64xf32>
+    %995 = "ttir.relu"(%993, %994) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %996 = tensor.empty() : tensor<1x64xf32>
+    %997 = "ttir.relu"(%995, %996) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %998 = tensor.empty() : tensor<1x64xf32>
+    %999 = "ttir.relu"(%997, %998) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    return %999 : tensor<1x64xf32>
+  }
+}
diff --git a/tools/explorer/test/run_tests.py b/tools/explorer/test/run_tests.py
index ceff14ae0..de1003f00 100644
--- a/tools/explorer/test/run_tests.py
+++ b/tools/explorer/test/run_tests.py
@@ -13,11 +13,19 @@
 PORT = 8002
 COMMAND_URL = "http://" + HOST + ":" + str(PORT) + "/apipost/v1/send_command"
 TEST_LOAD_MODEL_PATHS = [
-    "test/ttmlir/Dialect/TTNN/optimizer/mnist_sharding.mlir",
-    "tools/explorer/test/models/*.mlir",
+    "tools/explorer/test/models/forward_and_backward.mlir",
+    "tools/explorer/test/models/test_1k_ops.mlir",
+    "tools/explorer/test/models/linear_autoencoder.mlir",
+    "tools/explorer/test/models/resnet_ttir.mlir",
+    "tools/explorer/test/models/llama_attention_no_rot_emb_ttir.mlir",
+    "tools/explorer/test/models/open_llama_3b_single_layer.mlir",
+
 ]
+MNIST_SHARDING_TILED_PATH = (
+    "test/ttmlir/Silicon/TTNN/optimizer/mnist_sharding_tiled.mlir"
+)
 TEST_EXECUTE_MODEL_PATHS = [
-    "test/ttmlir/Silicon/TTNN/optimizer/mnist_sharding_tiled.mlir",
+    MNIST_SHARDING_TILED_PATH,
 ]
 
 
@@ -97,14 +105,14 @@ def test_execute_model(model_path):
 
 def test_execute_mnist_l1_interleaved():
     execute_command(
-        "test/ttmlir/Silicon/TTNN/optimizer/mnist_sharding_tiled.mlir",
+        MNIST_SHARDING_TILED_PATH,
         {"optimizationPolicy": "L1 Interleaved"},
     )
 
 
 def test_execute_mnist_optimizer_disabled():
     execute_command(
-        "test/ttmlir/Silicon/TTNN/optimizer/mnist_sharding_tiled.mlir",
+        MNIST_SHARDING_TILED_PATH,
         {"optimizationPolicy": "Optimizer Disabled"},
     )