-
Notifications
You must be signed in to change notification settings - Fork 658
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Reworked to do placement analysis on previous pass
- Loading branch information
Showing
10 changed files
with
179 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
66 changes: 66 additions & 0 deletions
66
compiler/src/iree/compiler/Dialect/Stream/Transforms/ExecutionPlacement.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
// Copyright 2025 The IREE Authors | ||
// | ||
// Licensed under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
||
#include "iree/compiler/Dialect/Stream/Analysis/Partitioning.h" | ||
#include "iree/compiler/Dialect/Stream/IR/StreamDialect.h" | ||
#include "iree/compiler/Dialect/Stream/IR/StreamOps.h" | ||
#include "iree/compiler/Dialect/Stream/IR/StreamTypes.h" | ||
#include "iree/compiler/Dialect/Stream/Transforms/Passes.h" | ||
#include "iree/compiler/Dialect/Util/IR/UtilDialect.h" | ||
#include "iree/compiler/Dialect/Util/IR/UtilOps.h" | ||
#include "iree/compiler/Dialect/Util/IR/UtilTypes.h" | ||
#include "llvm/ADT/EquivalenceClasses.h" | ||
#include "llvm/Support/Debug.h" | ||
#include "mlir/Analysis/TopologicalSortUtils.h" | ||
#include "mlir/Dialect/SCF/IR/SCF.h" | ||
#include "mlir/IR/Attributes.h" | ||
#include "mlir/IR/Builders.h" | ||
#include "mlir/IR/BuiltinOps.h" | ||
#include "mlir/IR/Dominance.h" | ||
#include "mlir/IR/IRMapping.h" | ||
#include "mlir/IR/Location.h" | ||
#include "mlir/IR/Matchers.h" | ||
#include "mlir/IR/PatternMatch.h" | ||
#include "mlir/Pass/Pass.h" | ||
#include "mlir/Pass/PassRegistry.h" | ||
#include "mlir/Transforms/GreedyPatternRewriteDriver.h" | ||
|
||
#define DEBUG_TYPE "iree-stream-execution-placement" | ||
|
||
namespace mlir::iree_compiler::IREE::Stream { | ||
|
||
#define GEN_PASS_DEF_EXECUTIONPLACEMENTPASS | ||
#include "iree/compiler/Dialect/Stream/Transforms/Passes.h.inc" | ||
|
||
namespace { | ||
|
||
struct ExecutionPlacementPass | ||
: public IREE::Stream::impl::ExecutionPlacementPassBase< | ||
ExecutionPlacementPass> { | ||
void runOnOperation() override { | ||
|
||
getOperation()->walk([](IREE::Stream::AsyncTransferOp transfer) { | ||
if (transfer.getExecAffinityAttr()) | ||
return; | ||
|
||
auto operand = transfer.getSource(); | ||
auto producer = operand.getDefiningOp(); | ||
auto streamable = | ||
dyn_cast_or_null<IREE::Stream::StreamableOpInterface>(producer); | ||
auto srcAffinity = dyn_cast<IREE::Stream::AffinityOpInterface>(producer); | ||
|
||
bool hasOneUse = operand.hasOneUse(); | ||
if (hasOneUse && streamable && srcAffinity) { | ||
transfer.setExecAffinityAttr(srcAffinity.getAffinityAttr()); | ||
} else { | ||
transfer.setExecAffinityAttr(transfer.getResultAffinityAttr()); | ||
} | ||
}); | ||
} | ||
}; | ||
|
||
} // namespace | ||
} // namespace mlir::iree_compiler::IREE::Stream |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
81 changes: 81 additions & 0 deletions
81
compiler/src/iree/compiler/Dialect/Stream/Transforms/test/execution_placement.mlir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(util.func(iree-stream-execution-placement))" %s | FileCheck %s | ||
|
||
// Tests partitioning multi-device execution with barriers and transfers. | ||
// It validates that multi-stream commands are created and run in parallel. | ||
|
||
// CHECK-LABEL: util.func public @deviceMultiDeviceSync | ||
util.func public @deviceMultiDeviceSync(%arg0: i1) -> !stream.resource<transient> { | ||
%c0 = arith.constant 0 : index | ||
%c1 = arith.constant 1 : index | ||
%c128 = arith.constant 128 : index | ||
%c255_i32 = arith.constant 255 : i32 | ||
|
||
%0 = stream.async.splat %c255_i32 : i32 -> !stream.resource<transient>{%c128} | ||
// CHECK: stream.async.dispatch | ||
%1 = stream.async.dispatch on(#hal.device.affinity<@device0>) @ex::@dispatch0[%c1, %c1, %c1](%0[%c0 to %c128 for %c128]) : (!stream.resource<transient>{%c128}) -> !stream.resource<transient>{%c128} | ||
%3 = stream.async.barrier %1 : !stream.resource<transient>{%c128} | ||
|
||
// CHECK: stream.async.transfer | ||
// CHECK-SAME: on(#hal.device.affinity<@device1>) | ||
%4 = stream.async.transfer %1 : !stream.resource<transient>{%c128} from(#hal.device.affinity<@device0>) -> to(#hal.device.affinity<@device1>) !stream.resource<transient>{%c128} | ||
// CHECK: stream.async.dispatch | ||
%2 = stream.async.dispatch on(#hal.device.affinity<@device1>) @ex::@dispatch1[%c1, %c1, %c1](%0[%c0 to %c128 for %c128]) : (!stream.resource<transient>{%c128}) -> !stream.resource<transient>{%c128} | ||
%5 = stream.async.barrier %2 : !stream.resource<transient>{%c128} | ||
|
||
// CHECK: stream.async.transfer | ||
// CHECK-SAME: on(#hal.device.affinity<@device0>) | ||
%6 = stream.async.transfer %2 : !stream.resource<transient>{%c128} from(#hal.device.affinity<@device1>) -> to(#hal.device.affinity<@device0>) !stream.resource<transient>{%c128} | ||
// CHECK: stream.async.dispatch | ||
%7 = stream.async.dispatch on(#hal.device.affinity<@device0>) @ex::@dispatch2[%c1, %c1, %c1](%3[%c0 to %c128 for %c128], %6[%c0 to %c128 for %c128]) : (!stream.resource<transient>{%c128}, !stream.resource<transient>{%c128}) -> !stream.resource<transient>{%c128} | ||
%8 = stream.async.barrier %7 : !stream.resource<transient>{%c128} | ||
%9 = stream.async.dispatch on(#hal.device.affinity<@device1>) @ex::@dispatch2[%c1, %c1, %c1](%4[%c0 to %c128 for %c128], %5[%c0 to %c128 for %c128]) : (!stream.resource<transient>{%c128}, !stream.resource<transient>{%c128}) -> !stream.resource<transient>{%c128} | ||
|
||
// CHECK: stream.async.transfer | ||
// CHECK-SAME: on(#hal.device.affinity<@device1>) | ||
%10 = stream.async.transfer %9 : !stream.resource<transient>{%c128} from(#hal.device.affinity<@device1>) -> to(#hal.device.affinity<@device0>) !stream.resource<transient>{%c128} | ||
// CHECK: stream.async.dispatch | ||
%11 = stream.async.dispatch on(#hal.device.affinity<@device0>) @ex::@dispatch2[%c1, %c1, %c1](%8[%c0 to %c128 for %c128], %10[%c0 to %c128 for %c128]) : (!stream.resource<transient>{%c128}, !stream.resource<transient>{%c128}) -> !stream.resource<transient>{%c128} | ||
util.return %11 : !stream.resource<transient> | ||
} | ||
|
||
// ----- | ||
|
||
// This one simulates how to do multi-device synchronization between | ||
// more than two devices. | ||
|
||
// CHECK-LABEL: @deviceTripleSync | ||
util.func public @deviceTripleSync(%arg0: i1) -> (!stream.resource<transient>, !stream.resource<transient>, !stream.resource<transient>) { | ||
%c0 = arith.constant 0 : index | ||
%c1 = arith.constant 1 : index | ||
%c128 = arith.constant 128 : index | ||
%c255_i32 = arith.constant 255 : i32 | ||
|
||
%0 = stream.async.splat %c255_i32 : i32 -> !stream.resource<transient>{%c128} | ||
%1 = stream.async.dispatch on(#hal.device.affinity<@device0>) @ex::@dispatch0[%c1, %c1, %c1](%0[%c0 to %c128 for %c128]) : (!stream.resource<transient>{%c128}) -> !stream.resource<transient>{%c128} | ||
%2 = stream.async.barrier %1 : !stream.resource<transient>{%c128} | ||
|
||
%3 = stream.async.dispatch on(#hal.device.affinity<@device1>) @ex::@dispatch0[%c1, %c1, %c1](%0[%c0 to %c128 for %c128]) : (!stream.resource<transient>{%c128}) -> !stream.resource<transient>{%c128} | ||
|
||
// CHECK: stream.async.transfer | ||
// CHECK-SAME: on(#hal.device.affinity<@device1>) | ||
%4 = stream.async.transfer %3 : !stream.resource<transient>{%c128} from(#hal.device.affinity<@device1>) -> to(#hal.device.affinity<@device0>) !stream.resource<transient>{%c128} | ||
%5 = stream.async.dispatch on(#hal.device.affinity<@device2>) @ex::@dispatch0[%c1, %c1, %c1](%0[%c0 to %c128 for %c128]) : (!stream.resource<transient>{%c128}) -> !stream.resource<transient>{%c128} | ||
|
||
// CHECK: stream.async.transfer | ||
// CHECK-SAME: on(#hal.device.affinity<@device2>) | ||
%6 = stream.async.transfer %5 : !stream.resource<transient>{%c128} from(#hal.device.affinity<@device2>) -> to(#hal.device.affinity<@device0>) !stream.resource<transient>{%c128} | ||
%7 = stream.async.dispatch on(#hal.device.affinity<@device0>) @ex::@dispatch2[%c1, %c1, %c1](%2[%c0 to %c128 for %c128], %4[%c0 to %c128 for %c128], %6[%c0 to %c128 for %c128]) : (!stream.resource<transient>{%c128}, !stream.resource<transient>{%c128}, !stream.resource<transient>{%c128}) -> !stream.resource<transient>{%c128} | ||
%8 = stream.async.barrier %7 : !stream.resource<transient>{%c128} | ||
%11 = stream.async.dispatch on(#hal.device.affinity<@device0>) @ex::@dispatch0[%c1, %c1, %c1](%8[%c0 to %c128 for %c128]) : (!stream.resource<transient>{%c128}) -> !stream.resource<transient>{%c128} | ||
|
||
// CHECK: stream.async.transfer | ||
// CHECK-SAME: on(#hal.device.affinity<@device1>) | ||
%9 = stream.async.transfer %7 : !stream.resource<transient>{%c128} from(#hal.device.affinity<@device0>) -> to(#hal.device.affinity<@device1>) !stream.resource<transient>{%c128} | ||
%12 = stream.async.dispatch on(#hal.device.affinity<@device1>) @ex::@dispatch0[%c1, %c1, %c1](%9[%c0 to %c128 for %c128]) : (!stream.resource<transient>{%c128}) -> !stream.resource<transient>{%c128} | ||
|
||
// CHECK: stream.async.transfer | ||
// CHECK-SAME: on(#hal.device.affinity<@device2>) | ||
%10 = stream.async.transfer %7 : !stream.resource<transient>{%c128} from(#hal.device.affinity<@device0>) -> to(#hal.device.affinity<@device2>) !stream.resource<transient>{%c128} | ||
%13 = stream.async.dispatch on(#hal.device.affinity<@device2>) @ex::@dispatch0[%c1, %c1, %c1](%10[%c0 to %c128 for %c128]) : (!stream.resource<transient>{%c128}) -> !stream.resource<transient>{%c128} | ||
util.return %11, %12, %13 : !stream.resource<transient>, !stream.resource<transient>, !stream.resource<transient> | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters