Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Incorrect TensorMemoryLayout set on op in TTNN dialect #1364

Closed
svuckovicTT opened this issue Nov 21, 2024 · 3 comments
Closed

Incorrect TensorMemoryLayout set on op in TTNN dialect #1364

svuckovicTT opened this issue Nov 21, 2024 · 3 comments
Assignees

Comments

@svuckovicTT
Copy link
Contributor

Repro branch: https://github.com/tenstorrent/tt-mlir/commits/svuckovic/repro-tensor-mem-layout-issue/

Running:

./build/bin/ttmlir-opt --ttir-to-ttnn-backend-pipeline --convert-ttnn-to-emitc test/ttmlir/Silicon/TTNN/emitc/simple_add.mlir

results in

Unsupported ttnn::TensorMemoryLayout
UNREACHABLE executed at /localdev/svuckovic/_workspace/repos/tt-mlir/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp:90!
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.

It looks like TensorMemoryLayout gets set to None in lib/Dialect/TTNN/Transforms/TTNNLayout.cpp.

@mtopalovicTT can you please take a look?

@svuckovicTT
Copy link
Contributor Author

To add, this is what the IR in TTNN (before going to EmitC) looks like:

#device = #tt.device<workerGrid = #tt.grid<8x8, (d0, d1) -> (0, d0, d1)>, l1Map = (d0, d1)[s0, s1] -> (0, d0 floordiv s0, d1 floordiv s1, (d0 mod s0) * s1 + d1 mod s1), dramMap = (d0, d1)[s0, s1] -> (0, 0, ((((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 8192) mod 12, (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 98304 + (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) mod 8192), meshShape = , chipIds = [0]>
#dram = #ttnn.buffer_type<dram>
#system_desc = #tt.system_desc<[{role = host, target_triple = "x86_64-pc-linux-gnu"}], [{arch = <wormhole_b0>, grid = 8x8, l1_size = 1499136, num_dram_channels = 12, dram_channel_size = 1073741824, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32, l1_unreserved_base = 1024, erisc_l1_unreserved_base = 1024, dram_unreserved_base = 1024, dram_unreserved_end = 1073741824, physical_cores = {worker = [ 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7,  1x0,  1x1,  1x2,  1x3,  1x4,  1x5,  1x6,  1x7,  2x0,  2x1,  2x2,  2x3,  2x4,  2x5,  2x6,  2x7,  3x0,  3x1,  3x2,  3x3,  3x4,  3x5,  3x6,  3x7,  4x0,  4x1,  4x2,  4x3,  4x4,  4x5,  4x6,  4x7,  5x0,  5x1,  5x2,  5x3,  5x4,  5x5,  5x6,  5x7,  6x0,  6x1,  6x2,  6x3,  6x4,  6x5,  6x6,  6x7,  7x0,  7x1,  7x2,  7x3,  7x4,  7x5,  7x6,  7x7] dram = [ 8x0,  9x0,  10x0,  8x1,  9x1,  10x1,  8x2,  9x2,  10x2,  8x3,  9x3,  10x3]}, supported_data_types = [<f32>, <f16>, <bf16>, <bfp_f8>, <bfp_bf8>, <bfp_f4>, <bfp_bf4>, <bfp_f2>, <bfp_bf2>, <u32>, <u16>, <u8>], supported_tile_sizes = [ 4x16,  16x16,  32x16,  4x32,  16x32,  32x32], num_cbs = 32}], [0], [3 : i32], [ 0x0x0x0]>
#system_memory = #ttnn.buffer_type<system_memory>
#ttnn_layout = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<32x32xbf16, #system_memory>>
#ttnn_layout1 = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<1x1x!tt.tile<32x32, bf16>, #dram>>
module attributes {tt.device = #device, tt.system_desc = #system_desc} {
  func.func @add(%arg0: tensor<32x32xbf16, #ttnn_layout>, %arg1: tensor<32x32xbf16, #ttnn_layout>) -> tensor<32x32xbf16, #ttnn_layout> {
    %0 = "ttnn.get_device"() <{mesh_shape = #ttnn<mesh_shape 1x1>}> : () -> !tt.device<#device>
    %1 = "ttnn.to_device"(%arg0, %0) <{memory_config = #ttnn.memory_config<<none>, #dram, <<1x1>>>}> : (tensor<32x32xbf16, #ttnn_layout>, !tt.device<#device>) -> tensor<32x32xbf16, #ttnn_layout1>
    %2 = "ttnn.to_layout"(%1) <{layout = #ttnn.layout<tile>}> : (tensor<32x32xbf16, #ttnn_layout1>) -> tensor<32x32xbf16, #ttnn_layout1>
    "ttnn.deallocate"(%1) <{force = false}> : (tensor<32x32xbf16, #ttnn_layout1>) -> ()
    %3 = "ttnn.to_device"(%arg1, %0) <{memory_config = #ttnn.memory_config<<none>, #dram, <<1x1>>>}> : (tensor<32x32xbf16, #ttnn_layout>, !tt.device<#device>) -> tensor<32x32xbf16, #ttnn_layout1>
    %4 = "ttnn.to_layout"(%3) <{layout = #ttnn.layout<tile>}> : (tensor<32x32xbf16, #ttnn_layout1>) -> tensor<32x32xbf16, #ttnn_layout1>
    "ttnn.deallocate"(%3) <{force = false}> : (tensor<32x32xbf16, #ttnn_layout1>) -> ()
    %5 = "ttnn.empty"() <{dtype = #tt.supportedDataTypes<bf16>, layout = #ttnn.layout<tile>, shape = #ttnn.shape<32x32>}> : () -> tensor<32x32xbf16, #ttnn_layout1>
    %6 = "ttnn.add"(%2, %4, %5) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<32x32xbf16, #ttnn_layout1>, tensor<32x32xbf16, #ttnn_layout1>, tensor<32x32xbf16, #ttnn_layout1>) -> tensor<32x32xbf16, #ttnn_layout1>
    "ttnn.deallocate"(%4) <{force = false}> : (tensor<32x32xbf16, #ttnn_layout1>) -> ()
    "ttnn.deallocate"(%2) <{force = false}> : (tensor<32x32xbf16, #ttnn_layout1>) -> ()
    %7 = "ttnn.from_device"(%6) : (tensor<32x32xbf16, #ttnn_layout1>) -> tensor<32x32xbf16, #ttnn_layout>
    "ttnn.deallocate"(%5) <{force = false}> : (tensor<32x32xbf16, #ttnn_layout1>) -> ()
    %8 = "ttnn.to_layout"(%7) <{layout = #ttnn.layout<row_major>}> : (tensor<32x32xbf16, #ttnn_layout>) -> tensor<32x32xbf16, #ttnn_layout>
    "ttnn.deallocate"(%7) <{force = false}> : (tensor<32x32xbf16, #ttnn_layout>) -> ()
    return %8 : tensor<32x32xbf16, #ttnn_layout>
  }
}

The to_device ops in SSA values %1 and %3 have none values.

@svuckovicTT
Copy link
Contributor Author

@mtopalovicTT this can be closed, right?

@mtopalovicTT
Copy link
Contributor

Missed the notification. Yes

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants