Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support dram memory space in metal direct #584

Merged
merged 1 commit into from
Sep 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions include/ttmlir/Dialect/TT/IR/TTOpsTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,25 @@
#include "ttmlir/Dialect/TT/IR/TTOpsEnums.h.inc"

namespace mlir::tt {
struct PhysGridResultIdx {
enum : int64_t {
DeviceIdx = 0,
CoreCoordY = 1,
CoreCoordX = 2,
NumIndices = 3,
};
};

struct MemoryMapResultIdx {
enum : int64_t {
DeviceIdx = 0,
CoreCoordY = 1,
CoreCoordX = 2,
ShardOffset = 3,
NumIndices = 4,
};
};

inline bool isSystemMemorySpace(MemorySpace memorySpace) {
return memorySpace == MemorySpace::System ||
memorySpace == MemorySpace::SystemMMIO;
Expand Down
29 changes: 22 additions & 7 deletions include/ttmlir/Dialect/TT/IR/TTOpsTypes.td
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def TT_LayoutAttr : TT_Attr<"Layout", "layout"> {
llvm::SmallVector<int64_t> getStride(ArrayRef<int64_t> logicalShape) const;
llvm::SmallVector<int64_t> getPhysicalShape(ArrayRef<int64_t> logicalShape) const;
llvm::SmallVector<int64_t> getShardShape() const;
AffineMap projectOnto(AffineMap linearMap, ArrayRef<int64_t> logicalTensorShape, GridAttr grid) const;
AffineMap projectOnto(AffineMap linearMap, AffineMap physicalMemoryMap, ArrayRef<int64_t> logicalTensorShape) const;
AffineMap getIdentityTileLinearMap() const;
llvm::SmallVector<int64_t> getTiledShape(ArrayRef<int64_t> logicalTensorShape) const;
}];
Expand Down Expand Up @@ -321,18 +321,33 @@ def TT_BufferAttr : TT_Attr<"Buffer", "buffer", []> {
}

def TT_DeviceAttr : TT_Attr<"Device", "device", []> {
let summary = "Device attribute in TT dialect";
let summary = "Device attribute in TT dialect.";
let description = [{
Describes the physical layout of a device in the system and is made up of a few components:
- A grid attribute that describes the device's compute grid shape. It not only describes the shape of the compute grid, but also
carries an affine map that describes how the logical grid maps to the physical grid.
- Two affine maps that describe how a tensor layout's linear attribute maps to the L1 and DRAM memory spaces.
- An array of chip ids that this device is made up of.
}];
let parameters = (ins TT_GridAttr:$grid, ArrayRefParameter<"unsigned">:$chipIds);
let assemblyFormat = "`<` qualified($grid) `,` `[` $chipIds `]` `>`";
let parameters = (ins TT_GridAttr:$workerGrid,
"AffineMap":$l1Map,
"AffineMap":$dramMap,
ArrayRefParameter<"unsigned">:$chipIds);
let assemblyFormat = "`<` `workerGrid` `=` qualified($workerGrid) `,` `l1Map` `=` qualified($l1Map) `,` `dramMap` `=` qualified($dramMap) `,` `chipIds` `=` `[` $chipIds `]` `>`";

let extraClassDeclaration = [{
static DeviceAttr get(::mlir::MLIRContext *context, ArrayRef<int64_t> shape, AffineMap physicalGridMapping, ArrayRef<unsigned> chipIds) {
return DeviceAttr::get(context, GridAttr::get(context, shape, physicalGridMapping), chipIds);
}
static DeviceAttr get(::mlir::MLIRContext *context, SystemDescAttr systemDesc, ArrayRef<unsigned> chipIds);
static DeviceAttr get(::mlir::MLIRContext *context, SystemDescAttr systemDesc, bool enableMultichip = false);
AffineMap getMapForMemorySpace(MemorySpace memorySpace) const {
switch (memorySpace) {
case MemorySpace::DeviceL1:
return getL1Map();
case MemorySpace::DeviceDRAM:
return getDramMap();
default:
llvm_unreachable("Unsupported memory space");
}
}
}];

let genVerifyDecl = 1;
Expand Down
79 changes: 67 additions & 12 deletions include/ttmlir/Dialect/TT/Utils/PhysicalCoreCoord.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,20 @@ struct PhysicalCoreCoord {

std::int64_t &operator[](std::size_t i) {
assert(i < 3);
return i == 0 ? d : i == 1 ? y : x;
switch (i) {
case 0:
return d;
case 1:
return y;
case 2:
return x;
default:
llvm_unreachable("invalid index");
}
}

std::int64_t operator[](std::size_t i) const {
assert(i < 3);
return i == 0 ? d : i == 1 ? y : x;
return (*const_cast<PhysicalCoreCoord *>(this))[i];
}

bool operator==(PhysicalCoreCoord const &other) const {
Expand All @@ -36,32 +44,79 @@ struct PhysicalCoreCoord {

class PhysicalCoreCoordMapping {
public:
PhysicalCoreCoordMapping(ArrayRef<tt::ChipDescAttr> chipDescs) {
ArrayRef<int64_t> firstChipGrid = chipDescs.front().getGrid();
static PhysicalCoreCoordMapping
getWorkerMapping(ArrayRef<unsigned> chipIds,
ArrayRef<tt::ChipDescAttr> chipDescs) {
SmallVector<std::array<int64_t, 2>> physCores;
ArrayRef<int64_t> firstChipGrid = chipDescs[chipIds.front()].getGrid();
assert(firstChipGrid.size() == 2);
grid = {firstChipGrid[0], firstChipGrid[1]};
std::array<int64_t, 2> grid = {firstChipGrid[0], firstChipGrid[1]};

workers.reserve(chipDescs.size() * grid[0] * grid[1]);
for (auto chipDesc : chipDescs) {
physCores.reserve(chipIds.size() * grid[0] * grid[1]);
for (auto chipId : chipIds) {
auto chipDesc = chipDescs[chipId];
auto chipGrid = chipDesc.getGrid();
assert(chipGrid == firstChipGrid);
ChipPhysicalCoresAttr chipPhysicalCores = chipDesc.getChipPhysicalCores();
assert(chipPhysicalCores.getWorker().size() ==
static_cast<size_t>(grid[0] * grid[1]));
for (auto worker : chipPhysicalCores.getWorker()) {
workers.push_back({worker.getY(), worker.getX()});
physCores.push_back({worker.getY(), worker.getX()});
}
}
assert(workers.size() == chipDescs.size() * grid[0] * grid[1]);
assert(physCores.size() == chipIds.size() * grid[0] * grid[1]);
return PhysicalCoreCoordMapping(grid, physCores);
}

static PhysicalCoreCoordMapping
getDramMapping(ArrayRef<unsigned> chipIds,
ArrayRef<tt::ChipDescAttr> chipDescs) {
ArrayRef<CoreCoordAttr> firstChipDramCores =
chipDescs[chipIds.front()].getChipPhysicalCores().getDram();

std::array<int64_t, 2> grid = {
1, static_cast<int64_t>(firstChipDramCores.size())};
SmallVector<std::array<int64_t, 2>> physCores;
physCores.reserve(chipIds.size() * grid[0] * grid[1]);
for (auto chipId : chipIds) {
auto chipDesc = chipDescs[chipId];
ChipPhysicalCoresAttr chipPhysicalCores = chipDesc.getChipPhysicalCores();
assert(chipPhysicalCores.getDram().size() ==
static_cast<size_t>(grid[0] * grid[1]));
for (auto dram : chipPhysicalCores.getDram()) {
physCores.push_back({dram.getY(), dram.getX()});
}
}
assert(physCores.size() == chipIds.size() * grid[0] * grid[1]);
return PhysicalCoreCoordMapping(grid, physCores);
}

static PhysicalCoreCoordMapping
getMemorySpaceMapping(ArrayRef<unsigned> chipIds,
ArrayRef<tt::ChipDescAttr> chipDescs,
MemorySpace memorySpace) {
switch (memorySpace) {
case MemorySpace::DeviceL1:
return getWorkerMapping(chipIds, chipDescs);
case MemorySpace::DeviceDRAM:
return getDramMapping(chipIds, chipDescs);
default:
llvm_unreachable("unsupported memory space");
}
}

std::array<int64_t, 2> operator[](PhysicalCoreCoord coord) const {
return workers[coord.d * grid[0] * grid[1] + coord.y * grid[1] + coord.x];
return physCores[coord.d * grid[0] * grid[1] + coord.y * grid[1] + coord.x];
}

private:
PhysicalCoreCoordMapping(std::array<int64_t, 2> grid,
SmallVector<std::array<int64_t, 2>> physCores)
: grid(grid), physCores(physCores) {}

private:
std::array<int64_t, 2> grid;
SmallVector<std::array<int64_t, 2>> workers;
SmallVector<std::array<int64_t, 2>> physCores;
};
} // namespace mlir::tt

Expand Down
16 changes: 10 additions & 6 deletions include/ttmlir/Target/Utils/MLIRToFlatbuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -289,19 +289,23 @@ toFlatbuffer(FlatbufferObjectCache &cache, GridAttr tensorGrid,
::ttmlir::utils::sample(
tensorGridShape, [&](ArrayRef<std::int64_t> virtualCoreCoord) {
SmallVector<std::int64_t> coreCoord = mapping.compose(virtualCoreCoord);
assert(coreCoord.size() == 3 && "expected a 2D core");
assert(coreCoord[0] == 0 && "expected single device");
assert(coreCoord.size() == PhysGridResultIdx::NumIndices &&
"expected a 2D core");
assert(coreCoord[PhysGridResultIdx::DeviceIdx] == 0 &&
"expected single device");
if (!coreRangeSet.empty() &&
((coreRangeSet.back().loc().y() == coreCoord[1]) &&
((coreRangeSet.back().loc().y() ==
coreCoord[PhysGridResultIdx::CoreCoordY]) &&
(coreRangeSet.back().loc().x() + coreRangeSet.back().size().x()) ==
coreCoord[2])) {
coreCoord[PhysGridResultIdx::CoreCoordX])) {
coreRangeSet.back() = ::tt::target::Dim2dRange(
coreRangeSet.back().loc(),
::tt::target::Dim2d(coreRangeSet.back().size().y(),
coreRangeSet.back().size().x() + 1));
} else {
coreRangeSet.push_back(::tt::target::Dim2dRange(
::tt::target::Dim2d(coreCoord[1], coreCoord[2]),
::tt::target::Dim2d(coreCoord[PhysGridResultIdx::CoreCoordY],
coreCoord[PhysGridResultIdx::CoreCoordX]),
::tt::target::Dim2d(1, 1)));
}
if (coreRangeSet.size() > 1 &&
Expand Down Expand Up @@ -401,7 +405,7 @@ layoutAttrToFlatbuffer(FlatbufferObjectCache &cache, Attribute attr,
auto strideInt64 = layoutAttr.getStride(logicalShape);
std::vector<int32_t> stride(strideInt64.begin(), strideInt64.end());
auto coreRangeSet =
toFlatbuffer(cache, layoutAttr.getGrid(), deviceAttr.getGrid());
toFlatbuffer(cache, layoutAttr.getGrid(), deviceAttr.getWorkerGrid());
return ::tt::target::CreateLayoutDescDirect(
*cache.fbb, &stride, toFlatbuffer(cache, layoutAttr.getOobVal()),
&coreRangeSet,
Expand Down
4 changes: 4 additions & 0 deletions include/ttmlir/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ template <typename T> T alignUp(T ptr, T alignment) {
return (ptr + alignment - 1) & ~(alignment - 1);
}

template <typename T> T alignDown(T ptr, T alignment) {
return ptr & ~(alignment - 1);
}

template <typename Vector, typename Fn>
inline void sample(Vector const &shape, Fn fn) {
llvm::SmallVector<std::int64_t, 8> strides(shape.size());
Expand Down
Loading
Loading