Skip to content

Commit

Permalink
Add system desc attribute dram_unreserved_end (#540)
Browse files Browse the repository at this point in the history
Calculate the end of the DRAM region that is not usable by compiler.  This
upper region of memory is where kernel programs get allocated to.  This
calculation intends to estimate some conservative max number, but still
needs a mechanism to enforce during runtime #539.
  • Loading branch information
nsmithtt authored Sep 3, 2024
1 parent fd466fc commit 85c081f
Show file tree
Hide file tree
Showing 7 changed files with 43 additions and 12 deletions.
4 changes: 3 additions & 1 deletion include/ttmlir/Dialect/TT/IR/TTOpsTypes.td
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ def TT_ChipDescAttr : TT_Attr<"ChipDesc", "chip_desc"> {
"unsigned":$l1UnreservedBase,
"unsigned":$eriscL1UnreservedBase,
"unsigned":$dramUnreservedBase,
"unsigned":$dramUnreservedEnd,
"ChipPhysicalCoresAttr":$chipPhysicalCores,
ArrayRefParameter<"DataTypeAttr">:$supportedDataTypes,
ArrayRefParameter<"TileSizeAttr">:$supportedTileSizes);
Expand All @@ -121,13 +122,14 @@ def TT_ChipDescAttr : TT_Attr<"ChipDesc", "chip_desc"> {
`l1_unreserved_base` `=` $l1UnreservedBase `,`
`erisc_l1_unreserved_base` `=` $eriscL1UnreservedBase `,`
`dram_unreserved_base` `=` $dramUnreservedBase `,`
`dram_unreserved_end` `=` $dramUnreservedEnd `,`
`physical_cores` `=` $chipPhysicalCores `,`
`supported_data_types` `=` `[` $supportedDataTypes `]` `,`
`supported_tile_sizes` `=` `[` $supportedTileSizes `]` `}`}];

let extraClassDeclaration = [{
unsigned getUsableL1Size() const { return getL1Size() - getL1UnreservedBase(); }
unsigned getUsableDramChannelSize() const { return getDramChannelSize() - getDramUnreservedBase(); }
unsigned getUsableDramChannelSize() const { return getDramUnreservedEnd() - getDramUnreservedBase(); }
}];
}

Expand Down
1 change: 1 addition & 0 deletions include/ttmlir/Target/Common/types.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ table ChipDesc {
l1_unreserved_base: uint32;
erisc_l1_unreserved_base: uint32;
dram_unreserved_base: uint32;
dram_unreserved_end: uint32;
physical_cores: ChipPhysicalCores;
supported_data_types: [DataType];
supported_tile_sizes: [Dim2d];
Expand Down
1 change: 1 addition & 0 deletions include/ttmlir/Target/Utils/MLIRToFlatbuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ toFlatbuffer(FlatbufferObjectCache &cache, ChipDescAttr chipDesc) {
chipDesc.getPcieAddressAlignBytes(),
chipDesc.getNocDRAMAddressAlignBytes(), chipDesc.getL1UnreservedBase(),
chipDesc.getEriscL1UnreservedBase(), chipDesc.getDramUnreservedBase(),
chipDesc.getDramUnreservedEnd(),
toFlatbuffer(cache, chipDesc.getChipPhysicalCores()),
toFlatbuffer(cache, chipDesc.getSupportedDataTypes()),
toFlatbuffer(cache, chipDesc.getSupportedTileSizes()));
Expand Down
6 changes: 3 additions & 3 deletions lib/CAPI/TTAttrs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,14 @@ MlirAttribute ttmlirTTChipDescAttrGet(
unsigned nocL1AddressAlignBytes, unsigned pcieAddressAlignBytes,
unsigned nocDRAMAddressAlignBytes, unsigned l1UnreservedBase,
unsigned eriscL1UnreservedBase, unsigned dramUnreservedBase,
MlirAttribute chipPhysicalCores, MlirAttribute *supportedDataTypes,
MlirAttribute *supportedTileSizes) {
unsigned dramUnreservedEnd, MlirAttribute chipPhysicalCores,
MlirAttribute *supportedDataTypes, MlirAttribute *supportedTileSizes) {
std::vector<int64_t> gridVec(grid, grid + gridSize);
return wrap(ChipDescAttr::get(
unwrap(ctx), mlir::dyn_cast<ArchAttr>(unwrap(arch)), gridVec, l1Size,
numDramChannels, dramChannelSize, nocL1AddressAlignBytes,
pcieAddressAlignBytes, nocDRAMAddressAlignBytes, l1UnreservedBase,
eriscL1UnreservedBase, dramUnreservedBase,
eriscL1UnreservedBase, dramUnreservedBase, dramUnreservedEnd,
mlir::dyn_cast<ChipPhysicalCoresAttr>(unwrap(chipPhysicalCores)),
mlir::dyn_cast<DataTypeAttr>(unwrap(*supportedDataTypes)),
mlir::dyn_cast<TileSizeAttr>(unwrap(*supportedTileSizes))));
Expand Down
6 changes: 3 additions & 3 deletions lib/Dialect/TT/IR/TTOpsTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ mlir::tt::SystemDescAttr::getDefault(MLIRContext *context) {
{
tt::ChipDescAttr::get(
context, tt::ArchAttr::get(context, tt::Arch::WormholeB0),
gridShape, 1499136, 12, (1 << 30), 16, 32, 32, 0, 0, 0,
gridShape, 1499136, 12, (1 << 30), 16, 32, 32, 0, 0, 0, (1 << 30),
tt::ChipPhysicalCoresAttr::get(context, workerCores, dramCores,
{}, {}),
supported_data_types, supported_tile_sizes),
Expand Down Expand Up @@ -242,8 +242,8 @@ mlir::tt::SystemDescAttr::getFromPath(MLIRContext *context, std::string &path) {
element->pcie_address_align_bytes(),
element->noc_dram_address_align_bytes(), element->l1_unreserved_base(),
element->erisc_l1_unreserved_base(), element->dram_unreserved_base(),
chip_physical_cores_attr, supported_data_types_attr,
supported_tile_sizes_attr);
element->dram_unreserved_end(), chip_physical_cores_attr,
supported_data_types_attr, supported_tile_sizes_attr);
chip_desc_list.push_back(current_chip_desc_attr);
}

Expand Down
6 changes: 3 additions & 3 deletions python/TTModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,15 +124,15 @@ void populateTTModule(py::module &m) {
unsigned dramChannelSize, unsigned nocL1AddressAlignBytes,
unsigned pcieAddressAlignBytes, unsigned nocDRAMAddressAlignBytes,
unsigned l1UnreservedBase, unsigned eriscL1UnreservedBase,
unsigned dramUnreservedBase, MlirAttribute chipPhysicalCores,
MlirAttribute supportedDataTypes,
unsigned dramUnreservedBase, unsigned dramUnreservedEnd,
MlirAttribute chipPhysicalCores, MlirAttribute supportedDataTypes,
MlirAttribute supportedTileSizes) {
return wrap(tt::ChipDescAttr::get(
unwrap(ctx), mlir::cast<tt::ArchAttr>(unwrap(arch)), grid,
l1Size, numDramChannels, dramChannelSize,
nocL1AddressAlignBytes, pcieAddressAlignBytes,
nocDRAMAddressAlignBytes, l1UnreservedBase,
eriscL1UnreservedBase, dramUnreservedBase,
eriscL1UnreservedBase, dramUnreservedBase, dramUnreservedEnd,
mlir::dyn_cast<tt::ChipPhysicalCoresAttr>(
unwrap(chipPhysicalCores)),
mlir::cast<tt::DataTypeAttr>(unwrap(supportedDataTypes)),
Expand Down
31 changes: 29 additions & 2 deletions runtime/lib/common/system_desc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,31 @@ createChipPhysicalCores(const ::tt::tt_metal::Device *device,
fbb.CreateVectorOfStructs(eth_inactive_cores));
}

// Calculate the end of the DRAM region that is not usable by compiler. This
// upper region of memory is where kernel programs get allocated to. This
// function intends to estimate some conservative max number.
static std::uint32_t
calculateDRAMUnreservedEnd(const ::tt::tt_metal::Device *device) {
CoreCoord deviceGridSize = device->logical_grid_size();
CoreCoord dramGridSize = device->dram_grid_size();
std::uint32_t totalCores = deviceGridSize.x * deviceGridSize.y +
device->get_active_ethernet_cores().size();
std::uint32_t totalDramCores = dramGridSize.x * dramGridSize.y;
std::uint32_t programCarveOutPerCore = L1_UNRESERVED_BASE;
std::uint32_t totalProgramCarveOut = programCarveOutPerCore * totalCores;
// The total carve out can be interleaved between all dram channels
std::uint32_t programCarveOutDramSpace =
(totalProgramCarveOut + totalDramCores - 1) / totalDramCores;
static_assert(DRAM_ALIGNMENT > 0);
static_assert((DRAM_ALIGNMENT & (DRAM_ALIGNMENT - 1)) == 0);
assert(programCarveOutDramSpace < device->dram_size_per_channel());
std::uint32_t dramUnreservedEnd =
device->dram_size_per_channel() - programCarveOutDramSpace;
// Align to DRAM_ALIGNMENT
dramUnreservedEnd = dramUnreservedEnd & ~(DRAM_ALIGNMENT - 1);
return dramUnreservedEnd;
}

static std::unique_ptr<::tt::runtime::SystemDesc>
getCurrentSystemDescImpl(const ::tt::tt_metal::DeviceMesh &deviceMesh) {
std::vector<::tt::tt_metal::Device *> devices = deviceMesh.get_devices();
Expand Down Expand Up @@ -192,13 +217,15 @@ getCurrentSystemDescImpl(const ::tt::tt_metal::DeviceMesh &deviceMesh) {
auto supportedTileSizes =
fbb.CreateVectorOfStructs(supportedTileSizesVector);

auto dramUnreservedEnd = calculateDRAMUnreservedEnd(device);

chipDescs.push_back(::tt::target::CreateChipDesc(
fbb, toFlatbuffer(device->arch()), &deviceGrid,
device->l1_size_per_core(), device->num_dram_channels(),
device->dram_size_per_channel(), L1_ALIGNMENT, PCIE_ALIGNMENT,
DRAM_ALIGNMENT, L1_UNRESERVED_BASE, ERISC_L1_UNRESERVED_BASE,
DRAM_UNRESERVED_BASE, chipPhysicalCores, supportedDataTypes,
supportedTileSizes));
DRAM_UNRESERVED_BASE, dramUnreservedEnd, chipPhysicalCores,
supportedDataTypes, supportedTileSizes));
chipDescIndices.push_back(device->id());
// Derive chip capability
::tt::target::ChipCapability chipCapability =
Expand Down

0 comments on commit 85c081f

Please sign in to comment.