diff --git a/include/ttmlir/Dialect/TTKernel/IR/TTKernelOps.td b/include/ttmlir/Dialect/TTKernel/IR/TTKernelOps.td index 2c68cfe88a..4001f70343 100644 --- a/include/ttmlir/Dialect/TTKernel/IR/TTKernelOps.td +++ b/include/ttmlir/Dialect/TTKernel/IR/TTKernelOps.td @@ -625,6 +625,68 @@ def TTKernel_CastToL1PtrOp : TTKernel_Op<"reinterpret_cast { + let summary = "GetNocMulticastAddr"; + let description = [{ + GetNocMulticastAddr + }]; + + let arguments = (ins I32:$noc_x_start, I32:$noc_y_start, I32:$noc_x_end, I32:$noc_y_end, I32:$addr, Optional:$noc); + let results = (outs TTKernel_NocAddr:$mcastNocAddr); +} + +def TTKernel_NocAsyncWriteMulticastOnePacketOp : TTKernel_Op<"noc_async_write_multicast_one_packet"> { + let summary = "NocAsyncWriteMulticastOnePacket"; + let description = [{ + NocAsyncWriteMulticastOnePacket + this issues only a single packet with size <= NOC_MAX_BURST_SIZE (ie maximum packet size) + }]; + + let arguments = (ins I32:$srcLocalL1Addr, TTKernel_NocAddr:$dstNocAddrMulticast, I32:$size, I32:$num_dests, OptionalAttr:$linked, OptionalAttr:$multicast_path_reserve, Optional:$noc); +} + +def TTKernel_NocAsyncWriteMulticastOp : TTKernel_Op<"noc_async_write_multicast"> { + let summary = "NocAsyncWriteMulticast"; + let description = [{ + Initiates an asynchronous write from a source address in L1 memory on the + Tensix core executing this function call to a rectangular destination grid. + The destinations are specified using a uint64_t encoding referencing an + on-chip grid of nodes located at NOC coordinate range + (x_start,y_start,x_end,y_end) and a local address created using + *get_noc_multicast_addr* function. Also, *see noc_async_write_barrier*. + + The destination nodes can only be a set of Tensix cores + L1 memory address. + The destination nodes must form a rectangular grid. The destination L1 + memory address must be the same on all destination nodes. + + With this API, the multicast sender cannot be part of the multicast + destinations. If the multicast sender has to be in the multicast + destinations (i.e. must perform a local L1 write), the other API variant + *noc_async_write_multicast_loopback_src* can be used. + + Note: The number of destinations needs to be non-zero. Besides that, + there is no restriction on the number of destinations, i.e. the + multicast destinations can span the full chip. However, as mentioned + previously, the multicast source cannot be part of the destinations. So, the + maximum number of destinations is 119. + }]; + + let arguments = (ins I32:$srcLocalL1Addr, TTKernel_NocAddr:$dstNocAddrMulticast, I32:$size, I32:$num_dests, OptionalAttr:$linked, OptionalAttr:$multicast_path_reserve, Optional:$noc); +} + +def TTKernel_NocAsyncWriteMulticastLoopbackSrcOp : TTKernel_Op<"noc_async_write_multicast_loopback_src"> { + let summary = "NocAsyncWriteMulticastLoopbackSrc"; + let description = [{ + NocAsyncWriteMulticastLoopbackSrc + }]; + + let arguments = (ins I32:$srcLocalL1Addr, TTKernel_NocAddr:$dstNocAddrMulticast, I32:$size, I32:$num_dests, OptionalAttr:$linked, OptionalAttr:$multicast_path_reserve, Optional:$noc); +} + //===----------------------------------------------------------------------===// // TTKernel Misc operations //===----------------------------------------------------------------------===// diff --git a/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp b/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp index bef8dcb764..0907bdc91f 100644 --- a/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp +++ b/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp @@ -247,7 +247,8 @@ class TTMetalToEmitCOpaqueRewriter : public OpConversionPattern { } else if constexpr (std::is_same_v) { SmallVector template_args; - template_args.push_back(emitc::OpaqueAttr::get(op.getContext(), "uint32_t")); + template_args.push_back( + emitc::OpaqueAttr::get(op.getContext(), "uint32_t")); return ArrayAttr::get(op.getContext(), template_args); } @@ -369,6 +370,12 @@ class ConvertTTKernelToEmitCPass TTMetalToEmitCOpaqueRewriter, TTMetalToEmitCOpaqueRewriter, TTMetalToEmitCOpaqueRewriter, + TTMetalToEmitCOpaqueRewriter, + TTMetalToEmitCOpaqueRewriter< + ttkernel::NocAsyncWriteMulticastOnePacketOp>, + TTMetalToEmitCOpaqueRewriter, + TTMetalToEmitCOpaqueRewriter< + ttkernel::NocAsyncWriteMulticastLoopbackSrcOp>, TTMetalToEmitCOpaqueRewriter, TTMetalToEmitCOpaqueRewriter, TTMetalToEmitCOpaqueRewriter,