module @AvgPool2d attributes {tt.device = #tt.device (0, d0, d1)>, l1Map = (d0, d1)[s0, s1] -> (0, d0 floordiv s0, d1 floordiv s1, (d0 mod s0) * s1 + d1 mod s1), dramMap = (d0, d1)[s0, s1] -> (0, 0, ((((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 8192) mod 12, (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 98304 + (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) mod 8192), meshShape = , chipIds = [0]>, tt.system_desc = #tt.system_desc<[{role = host, target_triple = "x86_64-pc-linux-gnu"}], [{arch = , grid = 8x8, l1_size = 1499136, num_dram_channels = 12, dram_channel_size = 1073741824, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32, l1_unreserved_base = 1024, erisc_l1_unreserved_base = 1024, dram_unreserved_base = 1024, dram_unreserved_end = 1073741824, physical_cores = {worker = [ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 1x0, 1x1, 1x2, 1x3, 1x4, 1x5, 1x6, 1x7, 2x0, 2x1, 2x2, 2x3, 2x4, 2x5, 2x6, 2x7, 3x0, 3x1, 3x2, 3x3, 3x4, 3x5, 3x6, 3x7, 4x0, 4x1, 4x2, 4x3, 4x4, 4x5, 4x6, 4x7, 5x0, 5x1, 5x2, 5x3, 5x4, 5x5, 5x6, 5x7, 6x0, 6x1, 6x2, 6x3, 6x4, 6x5, 6x6, 6x7, 7x0, 7x1, 7x2, 7x3, 7x4, 7x5, 7x6, 7x7] dram = [ 8x0, 9x0, 10x0, 8x1, 9x1, 10x1, 8x2, 9x2, 10x2, 8x3, 9x3, 10x3]}, supported_data_types = [, , , , , , , , , , , ], supported_tile_sizes = [ 4x16, 16x16, 32x16, 4x32, 16x32, 32x32], num_cbs = 32}], [0], [3 : i32], [ 0x0x0x0]>} { func.func @forward(%arg0: tensor<1x2048x7x7xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 * 14336 + d1 * 7 + d2, d3), <1x1>, memref<14336x7xf32, #ttnn.buffer_type>>> {ttir.name = "x"}) -> (tensor<1x2048x1x1xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 * 2048 + d1 + d2, d3), <1x1>, memref<2048x1xf32, #ttnn.buffer_type>>> {ttir.name = "AvgPool2d.output_avg_pool2d_0"}) { %0 = "ttnn.get_device"() <{mesh_shape = #ttnn}> : () -> !tt.device< (0, d0, d1)>, l1Map = (d0, d1)[s0, s1] -> (0, d0 floordiv s0, d1 floordiv s1, (d0 mod s0) * s1 + d1 mod s1), dramMap = (d0, d1)[s0, s1] -> (0, 0, ((((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 8192) mod 12, (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 98304 + (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) mod 8192), meshShape = , chipIds = [0]>> %1 = "ttnn.to_layout"(%arg0) <{layout = #ttnn.layout}> : (tensor<1x2048x7x7xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 * 14336 + d1 * 7 + d2, d3), <1x1>, memref<14336x7xf32, #ttnn.buffer_type>>>) -> tensor<1x2048x7x7xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 * 14336 + d1 * 7 + d2, d3), <1x1>, memref<448x1x!tt.tile<32x32, f32>, #ttnn.buffer_type>, interleaved>> %2 = "ttnn.to_device"(%1, %0) <{memory_config = #ttnn.memory_config<, , <<448x1>>>}> : (tensor<1x2048x7x7xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 * 14336 + d1 * 7 + d2, d3), <1x1>, memref<448x1x!tt.tile<32x32, f32>, #ttnn.buffer_type>, interleaved>>, !tt.device< (0, d0, d1)>, l1Map = (d0, d1)[s0, s1] -> (0, d0 floordiv s0, d1 floordiv s1, (d0 mod s0) * s1 + d1 mod s1), dramMap = (d0, d1)[s0, s1] -> (0, 0, ((((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 8192) mod 12, (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 98304 + (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) mod 8192), meshShape = , chipIds = [0]>>) -> tensor<1x2048x7x7xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 * 14336 + d1 * 7 + d2, d3), <1x1>, memref<448x1x!tt.tile<32x32, f32>, #ttnn.buffer_type>, interleaved>> "ttnn.deallocate"(%1) <{force = false}> : (tensor<1x2048x7x7xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 * 14336 + d1 * 7 + d2, d3), <1x1>, memref<448x1x!tt.tile<32x32, f32>, #ttnn.buffer_type>, interleaved>>) -> () %3 = "ttnn.reshape"(%2) <{shape = [1 : i32, 1 : i32, 2048 : i32, 49 : i32]}> : (tensor<1x2048x7x7xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 * 14336 + d1 * 7 + d2, d3), <1x1>, memref<448x1x!tt.tile<32x32, f32>, #ttnn.buffer_type>, interleaved>>) -> tensor<1x1x2048x49xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 * 2048 + d1 * 2048 + d2, d3), <1x1>, memref<2048x49xf32, #ttnn.buffer_type>, interleaved>> "ttnn.deallocate"(%2) <{force = false}> : (tensor<1x2048x7x7xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 * 14336 + d1 * 7 + d2, d3), <1x1>, memref<448x1x!tt.tile<32x32, f32>, #ttnn.buffer_type>, interleaved>>) -> () %4 = "ttnn.transpose"(%3) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x1x2048x49xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 * 2048 + d1 * 2048 + d2, d3), <1x1>, memref<2048x49xf32, #ttnn.buffer_type>, interleaved>>) -> tensor<1x1x49x2048xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 * 49 + d1 * 49 + d2, d3), <1x1>, memref<49x2048xf32, #ttnn.buffer_type>, interleaved>> "ttnn.deallocate"(%3) <{force = false}> : (tensor<1x1x2048x49xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 * 2048 + d1 * 2048 + d2, d3), <1x1>, memref<2048x49xf32, #ttnn.buffer_type>, interleaved>>) -> () %5 = "ttnn.mean"(%4) <{keep_dim = true}> : (tensor<1x1x49x2048xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 * 49 + d1 * 49 + d2, d3), <1x1>, memref<49x2048xf32, #ttnn.buffer_type>, interleaved>>) -> tensor<1x1x1x2048xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 + d1 + d2, d3), <1x1>, memref<1x2048xf32, #ttnn.buffer_type>, interleaved>> "ttnn.deallocate"(%4) <{force = false}> : (tensor<1x1x49x2048xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 * 49 + d1 * 49 + d2, d3), <1x1>, memref<49x2048xf32, #ttnn.buffer_type>, interleaved>>) -> () %6 = "ttnn.reshape"(%5) <{shape = [1 : i32, 2048 : i32, 1 : i32, 1 : i32]}> : (tensor<1x1x1x2048xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 + d1 + d2, d3), <1x1>, memref<1x2048xf32, #ttnn.buffer_type>, interleaved>>) -> tensor<1x2048x1x1xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 * 2048 + d1 + d2, d3), <1x1>, memref<2048x1xf32, #ttnn.buffer_type>, interleaved>> "ttnn.deallocate"(%5) <{force = false}> : (tensor<1x1x1x2048xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 + d1 + d2, d3), <1x1>, memref<1x2048xf32, #ttnn.buffer_type>, interleaved>>) -> () %7 = "ttnn.from_device"(%6) : (tensor<1x2048x1x1xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 * 2048 + d1 + d2, d3), <1x1>, memref<2048x1xf32, #ttnn.buffer_type>, interleaved>>) -> tensor<1x2048x1x1xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 * 2048 + d1 + d2, d3), <1x1>, memref<2048x1xf32, #ttnn.buffer_type>>> "ttnn.deallocate"(%6) <{force = false}> : (tensor<1x2048x1x1xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 * 2048 + d1 + d2, d3), <1x1>, memref<2048x1xf32, #ttnn.buffer_type>, interleaved>>) -> () %8 = "ttnn.to_layout"(%7) <{layout = #ttnn.layout}> : (tensor<1x2048x1x1xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 * 2048 + d1 + d2, d3), <1x1>, memref<2048x1xf32, #ttnn.buffer_type>>>) -> tensor<1x2048x1x1xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 * 2048 + d1 + d2, d3), <1x1>, memref<2048x1xf32, #ttnn.buffer_type>>> "ttnn.deallocate"(%7) <{force = false}> : (tensor<1x2048x1x1xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 * 2048 + d1 + d2, d3), <1x1>, memref<2048x1xf32, #ttnn.buffer_type>>>) -> () return %8 : tensor<1x2048x1x1xf32, #ttnn.ttnn_layout<(d0, d1, d2, d3) -> (d0 * 2048 + d1 + d2, d3), <1x1>, memref<2048x1xf32, #ttnn.buffer_type>>> } }