Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BinaryOpType cannot be mapped to BcastOpMath in greater & not_equal ops #1080

Open
kamalrajkannan78 opened this issue Oct 28, 2024 · 3 comments
Labels
MLIR Ops Issues related to MLIR dialect ops and their implementations
Milestone

Comments

@kamalrajkannan78
Copy link

kamalrajkannan78 commented Oct 28, 2024

Description

  • BinaryOpType cannot be mapped to BcastOpMath issue in greater & not_equal ops

Reproduce

git checkout kkannan/greator_and_not_equal_mlir_op_issue
git submodule update --recursive
cmake --build build -- install_ttforge
pytest forge/test/mlir/test_ops.py::test_greater[shape_x5-1] -svv
pytest forge/test/mlir/test_ops.py -k "not_equal" -svv

Observed Behaviour

E       RuntimeError: TT_THROW @ /proj_sw/user_dev/kkannan/forge_oct21/tt-forge-fe/third_party/tt-mlir/third_party/tt-metal/src/tt-metal/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_and_width_multi_core_program_factory.cpp:26: tt::exception
E       info:
E       BinaryOpType cannot be mapped to BcastOpMath

Logs

TTIR graphs

greater op

module @greater attributes {tt.system_desc = #tt.system_desc<[{arch = <wormhole_b0>, grid = 8x8, l1_size = 1499136, num_dram_channels = 12, dram_channel_size = 1073741824, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32, l1_unreserved_base = 1024, erisc_l1_unreserved_base = 1024, dram_unreserved_base = 1024, dram_unreserved_end = 1073741824, physical_cores = {worker = [ 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7,  1x0,  1x1,  1x2,  1x3,  1x4,  1x5,  1x6,  1x7,  2x0,  2x1,  2x2,  2x3,  2x4,  2x5,  2x6,  2x7,  3x0,  3x1,  3x2,  3x3,  3x4,  3x5,  3x6,  3x7,  4x0,  4x1,  4x2,  4x3,  4x4,  4x5,  4x6,  4x7,  5x0,  5x1,  5x2,  5x3,  5x4,  5x5,  5x6,  5x7,  6x0,  6x1,  6x2,  6x3,  6x4,  6x5,  6x6,  6x7,  7x0,  7x1,  7x2,  7x3,  7x4,  7x5,  7x6,  7x7] dram = [ 8x0,  9x0,  10x0,  8x1,  9x1,  10x1,  8x2,  9x2,  10x2,  8x3,  9x3,  10x3]}, supported_data_types = [<f32>, <f16>, <bf16>, <bfp_f8>, <bfp_bf8>, <bfp_f4>, <bfp_bf4>, <bfp_f2>, <bfp_bf2>, <u32>, <u16>, <u8>], supported_tile_sizes = [ 4x16,  16x16,  32x16,  4x32,  16x32,  32x32]}], [0], [3 : i32], [ 0x0x0x0]>} {
  func.func @forward(%arg0: tensor<1x1x32x32xf32> {ttir.name = "x"}, %arg1: tensor<1xf32> {ttir.name = "y"}) -> (tensor<1x1x32x32xf32> {ttir.name = "greater.output_greater_0"}) {
    %0 = tensor.empty() : tensor<1x1x32x32xf32>
    %1 = "ttir.gt"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x1x32x32xf32>, tensor<1xf32>, tensor<1x1x32x32xf32>) -> tensor<1x1x32x32xf32>
    return %1 : tensor<1x1x32x32xf32>
  }
}

not_equal op

module @not_equal attributes {tt.system_desc = #tt.system_desc<[{arch = <wormhole_b0>, grid = 8x8, l1_size = 1499136, num_dram_channels = 12, dram_channel_size = 1073741824, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32, l1_unreserved_base = 1024, erisc_l1_unreserved_base = 1024, dram_unreserved_base = 1024, dram_unreserved_end = 1073741824, physical_cores = {worker = [ 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7,  1x0,  1x1,  1x2,  1x3,  1x4,  1x5,  1x6,  1x7,  2x0,  2x1,  2x2,  2x3,  2x4,  2x5,  2x6,  2x7,  3x0,  3x1,  3x2,  3x3,  3x4,  3x5,  3x6,  3x7,  4x0,  4x1,  4x2,  4x3,  4x4,  4x5,  4x6,  4x7,  5x0,  5x1,  5x2,  5x3,  5x4,  5x5,  5x6,  5x7,  6x0,  6x1,  6x2,  6x3,  6x4,  6x5,  6x6,  6x7,  7x0,  7x1,  7x2,  7x3,  7x4,  7x5,  7x6,  7x7] dram = [ 8x0,  9x0,  10x0,  8x1,  9x1,  10x1,  8x2,  9x2,  10x2,  8x3,  9x3,  10x3]}, supported_data_types = [<f32>, <f16>, <bf16>, <bfp_f8>, <bfp_bf8>, <bfp_f4>, <bfp_bf4>, <bfp_f2>, <bfp_bf2>, <u32>, <u16>, <u8>], supported_tile_sizes = [ 4x16,  16x16,  32x16,  4x32,  16x32,  32x32]}], [0], [3 : i32], [ 0x0x0x0]>} {
  func.func @forward(%arg0: tensor<1x128xf32> {ttir.name = "x"}, %arg1: tensor<1xf32> {ttir.name = "y"}) -> (tensor<1x128xf32> {ttir.name = "not_equal.output_not_equal_0"}) {
    %0 = tensor.empty() : tensor<1x128xf32>
    %1 = "ttir.ne"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x128xf32>, tensor<1xf32>, tensor<1x128xf32>) -> tensor<1x128xf32>
    return %1 : tensor<1x128xf32>
  }
}
@kamalrajkannan78 kamalrajkannan78 added the MLIR Ops Issues related to MLIR dialect ops and their implementations label Oct 28, 2024
@kamalrajkannan78 kamalrajkannan78 added this to the [FFE] Bringup milestone Oct 28, 2024
@nvukobratTT
Copy link
Contributor

@kamalrajkannan78 can you add TTIR for this issue as well? That way, it's much easier for MLIR folks to reproduce these issues without the need of running Forge-Fe as well :))

@kamalrajkannan78
Copy link
Author

@kamalrajkannan78 can you add TTIR for this issue as well? That way, it's much easier for MLIR folks to reproduce these issues without the need of running Forge-Fe as well :))

TTIR graphs are present in attached log files @nvukobratTT

@kamalrajkannan78
Copy link
Author

TTIR graphs are attached.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
MLIR Ops Issues related to MLIR dialect ops and their implementations
Projects
None yet
Development

No branches or pull requests

2 participants