diff --git a/include/ttmlir/Dialect/TT/IR/TTOpsTypes.td b/include/ttmlir/Dialect/TT/IR/TTOpsTypes.td index df23eab7d7..bb415a48ad 100644 --- a/include/ttmlir/Dialect/TT/IR/TTOpsTypes.td +++ b/include/ttmlir/Dialect/TT/IR/TTOpsTypes.td @@ -61,8 +61,8 @@ def TT_ChipDescAttr : TT_Attr<"ChipDesc", "chip_desc"> { TT chip_desc attribute }]; - let parameters = (ins "ArchAttr":$arch, TT_GridAttr:$grid, "unsigned":$l1Size, "unsigned":$numDramChannels, "unsigned":$dramChannelSize); - let assemblyFormat = "`{` `arch` `=` $arch `,` `grid` `=` $grid `,` `l1_size` `=` $l1Size `,` `num_dram_channels` `=` $numDramChannels `,` `dram_channel_size` `=` $dramChannelSize `}`"; + let parameters = (ins "ArchAttr":$arch, TT_GridAttr:$grid, "unsigned":$l1Size, "unsigned":$numDramChannels, "unsigned":$dramChannelSize, "unsigned":$nocL1AddressAlignBytes, "unsigned":$pcieAddressAlignBytes, "unsigned":$nocDRAMAddressAlignBytes); + let assemblyFormat = "`{` `arch` `=` $arch `,` `grid` `=` $grid `,` `l1_size` `=` $l1Size `,` `num_dram_channels` `=` $numDramChannels `,` `dram_channel_size` `=` $dramChannelSize `,` `noc_l1_address_align_bytes` `=` $nocL1AddressAlignBytes `,` `pcie_address_align_bytes` `=` $pcieAddressAlignBytes `,` `noc_dram_address_align_bytes` `=` $nocDRAMAddressAlignBytes `}`"; } def TT_ChipCoordAttr : TT_Attr<"ChipCoord", "chip_coord"> { diff --git a/include/ttmlir/Target/Common/types.fbs b/include/ttmlir/Target/Common/types.fbs index e626cee3e3..dd03a53c80 100644 --- a/include/ttmlir/Target/Common/types.fbs +++ b/include/ttmlir/Target/Common/types.fbs @@ -98,6 +98,9 @@ table ChipDesc { l1_size: uint64; num_dram_channels: uint32; dram_channel_size: uint64; + noc_l1_address_align_bytes: uint32; + pcie_address_align_bytes: uint32; + noc_dram_address_align_bytes: uint32; } struct ChipCoord { diff --git a/include/ttmlir/Target/Utils/MLIRToFlatbuffer.h b/include/ttmlir/Target/Utils/MLIRToFlatbuffer.h index 87e50e3652..b2d41034fc 100644 --- a/include/ttmlir/Target/Utils/MLIRToFlatbuffer.h +++ b/include/ttmlir/Target/Utils/MLIRToFlatbuffer.h @@ -132,7 +132,9 @@ toFlatbuffer(FlatbufferObjectCache &cache, ChipDescAttr chipDesc) { return ::tt::target::CreateChipDesc( *cache.fbb, toFlatbuffer(cache, chipDesc.getArch()), &grid, chipDesc.getL1Size(), chipDesc.getNumDramChannels(), - chipDesc.getDramChannelSize()); + chipDesc.getDramChannelSize(), chipDesc.getNocL1AddressAlignBytes(), + chipDesc.getPcieAddressAlignBytes(), + chipDesc.getNocDRAMAddressAlignBytes()); } template diff --git a/lib/Dialect/TT/IR/TTOpsTypes.cpp b/lib/Dialect/TT/IR/TTOpsTypes.cpp index 2c4d06edf2..e4e31f5345 100644 --- a/lib/Dialect/TT/IR/TTOpsTypes.cpp +++ b/lib/Dialect/TT/IR/TTOpsTypes.cpp @@ -26,7 +26,8 @@ mlir::tt::SystemDescAttr::getDefault(MLIRContext *context) { { tt::ChipDescAttr::get( context, tt::ArchAttr::get(context, tt::Arch::WormholeB0), - tt::GridAttr::get(context, {8, 8}), (1 << 20), 12, (1 << 20)), + tt::GridAttr::get(context, {8, 8}), (1 << 20), 12, (1 << 20), 16, + 32, 32), }, // Chip Descriptor Indices { diff --git a/runtime/lib/ttnn/runtime.cpp b/runtime/lib/ttnn/runtime.cpp index ee5ad46afe..2e8ac34b2e 100644 --- a/runtime/lib/ttnn/runtime.cpp +++ b/runtime/lib/ttnn/runtime.cpp @@ -9,6 +9,11 @@ #include "ttmlir/Target/TTNN/Target.h" #include "ttmlir/Version.h" +// references to alignment byte values +constexpr static const std::uint32_t &REF_NOC_L1_ALIGNMENT = L1_ALIGNMENT; +constexpr static const std::uint32_t &REF_PCIE_ALIGNMENT = PCIE_ALIGNMENT; +constexpr static const std::uint32_t &REF_NOC_DRAM_ALIGNMENT = DRAM_ALIGNMENT; + namespace tt::runtime::ttnn { static ::tt::target::Arch toFlatbuffer(::tt::ARCH arch) { switch (arch) { @@ -41,7 +46,9 @@ std::pair getCurrentSystemDesc() { ::tt::target::Dim2d deviceGrid = toFlatbuffer(device.logical_grid_size()); std::vector<::flatbuffers::Offset> chipDescs = { ::tt::target::CreateChipDesc(fbb, toFlatbuffer(device.arch()), - &deviceGrid), + &deviceGrid, (1 << 20), 12, (1 << 20), + REF_NOC_L1_ALIGNMENT, REF_PCIE_ALIGNMENT, + REF_NOC_DRAM_ALIGNMENT), }; std::vector chipDescIndices = { 0, diff --git a/test/ttmlir/Dialect/TTIR/test_allocate.mlir b/test/ttmlir/Dialect/TTIR/test_allocate.mlir index 844e222bd8..34ac1771f6 100644 --- a/test/ttmlir/Dialect/TTIR/test_allocate.mlir +++ b/test/ttmlir/Dialect/TTIR/test_allocate.mlir @@ -1,6 +1,6 @@ // RUN: ttmlir-opt --ttir-layout --ttir-allocate %s | FileCheck %s #any_device = #tt.operand_constraint -module attributes {torch.debug_module_name = "_lambda", tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576}], [0], [], [<0, 0, 0, 0>]>} { +module attributes {torch.debug_module_name = "_lambda", tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32}], [0], [], [<0, 0, 0, 0>]>} { func.func @forward(%arg0: tensor<64x128xf32>, %arg1: tensor<64x128xf32>) -> tensor<64x128xf32> { // CHECK: %[[C:.*]] = "ttir.alloc"[[C:.*]] // CHECK-NOT: %[[C:.*]] = tensor.empty() : tensor<64x128xf32> diff --git a/test/ttmlir/Dialect/TTIR/test_generic.mlir b/test/ttmlir/Dialect/TTIR/test_generic.mlir index 44d8418302..ae51924992 100644 --- a/test/ttmlir/Dialect/TTIR/test_generic.mlir +++ b/test/ttmlir/Dialect/TTIR/test_generic.mlir @@ -1,6 +1,6 @@ // RUN: ttmlir-opt --ttir-generic %s | FileCheck %s #any_device = #tt.operand_constraint -module attributes {torch.debug_module_name = "_lambda", tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576}], [0], [], [<0, 0, 0, 0>]>} { +module attributes {torch.debug_module_name = "_lambda", tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32}], [0], [], [<0, 0, 0, 0>]>} { func.func @forward(%arg0: tensor<64x128xf32>, %arg1: tensor<64x128xf32>) -> tensor<64x128xf32> { %0 = tensor.empty() : tensor<64x128xf32> // CHECK: %[[C:.*]] = "ttir.generic"[[C:.*]] diff --git a/test/ttmlir/Dialect/TTIR/test_grid_set.mlir b/test/ttmlir/Dialect/TTIR/test_grid_set.mlir index 23d91384aa..2c2dc7c46d 100644 --- a/test/ttmlir/Dialect/TTIR/test_grid_set.mlir +++ b/test/ttmlir/Dialect/TTIR/test_grid_set.mlir @@ -1,6 +1,6 @@ // RUN: ttmlir-opt --ttir-layout --ttir-grid-set %s | FileCheck %s #any_device = #tt.operand_constraint -module attributes {torch.debug_module_name = "_lambda", tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576}], [0], [], [<0, 0, 0, 0>]>} { +module attributes {torch.debug_module_name = "_lambda", tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32}], [0], [], [<0, 0, 0, 0>]>} { func.func @forward(%arg0: tensor<64x128xf32>, %arg1: tensor<64x128xf32>) -> tensor<64x128xf32> { %0 = tensor.empty() : tensor<64x128xf32> // CHECK: #layout2 = #tt.layout<(d0, d1) -> (d0, d1), undef, <8x8>, memref<8x16xf32, #l1_>> diff --git a/test/ttmlir/Dialect/TTIR/test_layout.mlir b/test/ttmlir/Dialect/TTIR/test_layout.mlir index 5e6485e008..c7cf49696c 100644 --- a/test/ttmlir/Dialect/TTIR/test_layout.mlir +++ b/test/ttmlir/Dialect/TTIR/test_layout.mlir @@ -1,6 +1,6 @@ // RUN: ttmlir-opt --ttir-layout %s | FileCheck %s #any_device = #tt.operand_constraint -module attributes {torch.debug_module_name = "_lambda", tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576}], [0], [], [<0, 0, 0, 0>]>} { +module attributes {torch.debug_module_name = "_lambda", tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32}], [0], [], [<0, 0, 0, 0>]>} { func.func @forward(%arg0: tensor<8x64x128xf32>, %arg1: tensor<8x64x128xf32>) -> tensor<8x64x128xf32> { %0 = tensor.empty() : tensor<8x64x128xf32> // CHECK: %[[C:.*]] = "ttir.layout"[[C:.*]] diff --git a/test/ttmlir/Dialect/TTMetal/simple_multiply.mlir b/test/ttmlir/Dialect/TTMetal/simple_multiply.mlir index 31421dc093..33f307d576 100644 --- a/test/ttmlir/Dialect/TTMetal/simple_multiply.mlir +++ b/test/ttmlir/Dialect/TTMetal/simple_multiply.mlir @@ -1,6 +1,6 @@ // RUN: ttmlir-opt --ttir-generic --ttir-layout --ttir-generic-region-operands-to-memref --ttir-allocate --convert-ttir-to-ttmetal %s | FileCheck %s #any_device = #tt.operand_constraint -module attributes {torch.debug_module_name = "_lambda", tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576}], [0], [], [<0, 0, 0, 0>]>} { +module attributes {torch.debug_module_name = "_lambda", tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32}], [0], [], [<0, 0, 0, 0>]>} { func.func @forward(%arg0: tensor<64x128xf32>, %arg1: tensor<64x128xf32>) -> tensor<64x128xf32> { // CHECK: %[[C:.*]] = "ttmetal.alloc"[[C:.*]] // CHECK: %[[C:.*]] = "ttmetal.host_write"[[C:.*]] diff --git a/test/ttmlir/Dialect/TTNN/simple_matmul.mlir b/test/ttmlir/Dialect/TTNN/simple_matmul.mlir index ea3dc6fc23..e786b015f1 100644 --- a/test/ttmlir/Dialect/TTNN/simple_matmul.mlir +++ b/test/ttmlir/Dialect/TTNN/simple_matmul.mlir @@ -1,6 +1,6 @@ // RUN: ttmlir-opt --ttir-layout --ttnn-open-device --convert-ttir-to-ttnn %s | FileCheck %s #any_device_tile = #tt.operand_constraint -module attributes {tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576}], [0], [], [<0, 0, 0, 0>]>} { +module attributes {tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32}], [0], [], [<0, 0, 0, 0>]>} { func.func @forward(%arg0: tensor<64x128xbf16>, %arg1: tensor<128x96xbf16>) -> tensor<64x96xbf16> { %0 = tensor.empty() : tensor<64x96xbf16> // CHECK: %[[C:.*]] = "ttnn.matmul"[[C:.*]] diff --git a/test/ttmlir/Dialect/TTNN/simple_multiply.mlir b/test/ttmlir/Dialect/TTNN/simple_multiply.mlir index 9dcdd4b26d..3e715eed91 100644 --- a/test/ttmlir/Dialect/TTNN/simple_multiply.mlir +++ b/test/ttmlir/Dialect/TTNN/simple_multiply.mlir @@ -1,6 +1,6 @@ // RUN: ttmlir-opt --ttir-layout --ttnn-open-device --convert-ttir-to-ttnn %s | FileCheck %s #any_device = #tt.operand_constraint -module attributes {torch.debug_module_name = "_lambda", tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576}], [0], [], [<0, 0, 0, 0>]>} { +module attributes {torch.debug_module_name = "_lambda", tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32}], [0], [], [<0, 0, 0, 0>]>} { func.func @forward(%arg0: tensor<64x128xf32>, %arg1: tensor<64x128xf32>) -> tensor<64x128xf32> { // CHECK: %[[C:.*]] = "ttnn.open_device"[[C:.*]] // CHECK: %[[C:.*]] = "ttnn.full"[[C:.*]] diff --git a/test/ttmlir/Dialect/TTNN/simple_relu.mlir b/test/ttmlir/Dialect/TTNN/simple_relu.mlir index 5c9d7643fe..307a181cbf 100644 --- a/test/ttmlir/Dialect/TTNN/simple_relu.mlir +++ b/test/ttmlir/Dialect/TTNN/simple_relu.mlir @@ -1,6 +1,6 @@ // RUN: ttmlir-opt --ttir-layout --ttnn-open-device --convert-ttir-to-ttnn %s | FileCheck %s #any_device = #tt.operand_constraint -module attributes {torch.debug_module_name = "_lambda", tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576}], [0], [], [<0, 0, 0, 0>]>} { +module attributes {torch.debug_module_name = "_lambda", tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32}], [0], [], [<0, 0, 0, 0>]>} { func.func @forward(%arg0: tensor<64x128xf32>) -> tensor<64x128xf32> { // CHECK: %[[C:.*]] = "ttnn.open_device"[[C:.*]] // CHECK: %[[C:.*]] = "ttnn.full"[[C:.*]] diff --git a/test/ttmlir/Dialect/TTNN/simple_subtract.mlir b/test/ttmlir/Dialect/TTNN/simple_subtract.mlir index da2d9f2ea0..801ac1d11a 100644 --- a/test/ttmlir/Dialect/TTNN/simple_subtract.mlir +++ b/test/ttmlir/Dialect/TTNN/simple_subtract.mlir @@ -1,6 +1,6 @@ // RUN: ttmlir-opt --ttir-layout --ttnn-open-device --convert-ttir-to-ttnn %s | FileCheck %s #any_device = #tt.operand_constraint -module attributes {torch.debug_module_name = "_lambda", tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576}], [0], [], [<0, 0, 0, 0>]>} { +module attributes {torch.debug_module_name = "_lambda", tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32}], [0], [], [<0, 0, 0, 0>]>} { func.func @forward(%arg0: tensor<64x128xf32>, %arg1: tensor<64x128xf32>) -> tensor<64x128xf32> { // CHECK: %[[C:.*]] = "ttnn.open_device"[[C:.*]] // CHECK: %[[C:.*]] = "ttnn.full"[[C:.*]] diff --git a/test/ttmlir/Dialect/TTNN/simple_sum.mlir b/test/ttmlir/Dialect/TTNN/simple_sum.mlir index d60c4f3d20..87ac5f4dcb 100644 --- a/test/ttmlir/Dialect/TTNN/simple_sum.mlir +++ b/test/ttmlir/Dialect/TTNN/simple_sum.mlir @@ -1,6 +1,6 @@ // RUN: ttmlir-opt --ttir-layout --ttnn-open-device --convert-ttir-to-ttnn %s | FileCheck %s #any_device = #tt.operand_constraint -module attributes {torch.debug_module_name = "_lambda", tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576}], [0], [], [<0, 0, 0, 0>]>} { +module attributes {torch.debug_module_name = "_lambda", tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32}], [0], [], [<0, 0, 0, 0>]>} { func.func @forward(%arg0: tensor<512x1024xbf16>) -> tensor<512x32xbf16> { // CHECK: %[[C:.*]] = "ttnn.open_device"[[C:.*]] // CHECK: %[[C:.*]] = "ttnn.full"[[C:.*]] diff --git a/test/ttmlir/Dialect/TTNN/ttir_to_ttnn_pipeline.mlir b/test/ttmlir/Dialect/TTNN/ttir_to_ttnn_pipeline.mlir index 5de440b656..913fc41e8a 100644 --- a/test/ttmlir/Dialect/TTNN/ttir_to_ttnn_pipeline.mlir +++ b/test/ttmlir/Dialect/TTNN/ttir_to_ttnn_pipeline.mlir @@ -1,6 +1,6 @@ // RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline %s | FileCheck %s #any_device = #tt.operand_constraint -module attributes {torch.debug_module_name = "_lambda", tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576}], [0], [], [<0, 0, 0, 0>]>} { +module attributes {torch.debug_module_name = "_lambda", tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32}], [0], [], [<0, 0, 0, 0>]>} { func.func @forward(%arg0: tensor<64x128xf32>, %arg1: tensor<64x128xf32>) -> tensor<64x128xf32> { // CHECK: #layout2 = #tt.layout<(d0, d1) -> (d0, d1), undef, <8x8>, memref<8x16xf32, #l1_>> // CHECK: %[[C:.*]] = "ttnn.open_device"[[C:.*]] diff --git a/test/ttmlir/Dialect/TTNN/ttir_to_ttnn_pipeline_custom_opt.mlir b/test/ttmlir/Dialect/TTNN/ttir_to_ttnn_pipeline_custom_opt.mlir index bc7935953b..886ef5f1d4 100644 --- a/test/ttmlir/Dialect/TTNN/ttir_to_ttnn_pipeline_custom_opt.mlir +++ b/test/ttmlir/Dialect/TTNN/ttir_to_ttnn_pipeline_custom_opt.mlir @@ -1,6 +1,6 @@ // RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-grid-set=false" %s | FileCheck %s #any_device = #tt.operand_constraint -module attributes {torch.debug_module_name = "_lambda", tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576}], [0], [], [<0, 0, 0, 0>]>} { +module attributes {torch.debug_module_name = "_lambda", tt.system_desc = #tt.system_desc<[{arch = , grid = <8x8>, l1_size = 1048576, num_dram_channels = 12, dram_channel_size = 1048576, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32}], [0], [], [<0, 0, 0, 0>]>} { func.func @forward(%arg0: tensor<64x128xf32>, %arg1: tensor<64x128xf32>) -> tensor<64x128xf32> { // CHECK: #layout1 = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<64x128xf32, #l1_>> // CHECK: %[[C:.*]] = "ttnn.open_device"[[C:.*]] diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt index 82edc9a61a..480f22b4b1 100644 --- a/third_party/CMakeLists.txt +++ b/third_party/CMakeLists.txt @@ -24,6 +24,7 @@ set(TTMETAL_INCLUDE_DIRS ${PROJECT_SOURCE_DIR}/third_party/tt-metal/src/tt-metal/tt_metal/hw/inc/${ARCH_EXTRA_DIR} ${PROJECT_SOURCE_DIR}/third_party/tt-metal/src/tt-metal/tt_metal/third_party/umd/src/firmware/riscv/${ARCH_NAME} ${PROJECT_SOURCE_DIR}/third_party/tt-metal/src/tt-metal/tt_eager + ${PROJECT_SOURCE_DIR}/third_party/tt-metal/tt_metal/hostdevcommon PARENT_SCOPE ) set(TTMETAL_LIBRARY_DIR ${PROJECT_SOURCE_DIR}/third_party/tt-metal/src/tt-metal-build/lib PARENT_SCOPE)