diff --git a/compiler/plugins/target/ROCM/ROCMTarget.cpp b/compiler/plugins/target/ROCM/ROCMTarget.cpp index 017e81c74513..49e213d2c54f 100644 --- a/compiler/plugins/target/ROCM/ROCMTarget.cpp +++ b/compiler/plugins/target/ROCM/ROCMTarget.cpp @@ -60,14 +60,12 @@ namespace mlir::iree_compiler::IREE::HAL { namespace { // TODO(#18792): rename flags back to iree-rocm- as they are not HIP-specific. -// Only iree-hip-legacy-sync applies uniquely to HIP. struct ROCMOptions { std::string target = ""; std::string targetFeatures = ""; std::string bitcodeDirectory = getDefaultBitcodeDirectory(); int wavesPerEu = 0; std::string enableROCMUkernels = "none"; - bool legacySync = true; bool slpVectorization = true; bool globalISel = false; @@ -107,9 +105,7 @@ struct ROCMOptions { cl::desc("Enables microkernels in the HIP compiler backend. May be " "`default`, `none`, `all`, or a comma-separated list of " "specific unprefixed microkernels to enable, e.g. `mmt4d`.")); - binder.opt("iree-hip-legacy-sync", legacySync, cl::cat(category), - cl::desc("Enables 'legacy-sync' mode, which is required " - "for inline execution.")); + binder.list( "iree-hip-pass-plugin-path", passPlugins, cl::desc("LLVM pass plugins are out of tree libraries that implement " @@ -876,12 +872,6 @@ class HIPTargetDevice final : public TargetDevice { Builder b(context); SmallVector deviceConfigAttrs; - if (options.legacySync) { - // Indicates that the runtime HAL driver operates only in the legacy - // synchronous mode. - deviceConfigAttrs.emplace_back(b.getStringAttr("legacy_sync"), - b.getUnitAttr()); - } auto deviceConfigAttr = b.getDictionaryAttr(deviceConfigAttrs); SmallVector executableConfigAttrs; diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/BUILD.bazel b/compiler/src/iree/compiler/Dialect/HAL/Transforms/BUILD.bazel index b39f541051c8..e0354e004ad7 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/BUILD.bazel +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/BUILD.bazel @@ -23,7 +23,6 @@ iree_compiler_cc_library( "DumpExecutableBenchmarks.cpp", "DumpExecutableSources.cpp", "ElideRedundantCommands.cpp", - "FixupLegacySync.cpp", "HoistExecutableObjects.cpp", "InitializeDevices.cpp", "InlineMemoizeRegions.cpp", diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/CMakeLists.txt b/compiler/src/iree/compiler/Dialect/HAL/Transforms/CMakeLists.txt index 7dccc49f88f3..d66a471bc7c6 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/CMakeLists.txt +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/CMakeLists.txt @@ -24,7 +24,6 @@ iree_cc_library( "DumpExecutableBenchmarks.cpp" "DumpExecutableSources.cpp" "ElideRedundantCommands.cpp" - "FixupLegacySync.cpp" "HoistExecutableObjects.cpp" "InitializeDevices.cpp" "InlineMemoizeRegions.cpp" diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/FixupLegacySync.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/FixupLegacySync.cpp deleted file mode 100644 index 7f6a18e93453..000000000000 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/FixupLegacySync.cpp +++ /dev/null @@ -1,210 +0,0 @@ -// Copyright 2022 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "iree/compiler/Dialect/HAL/Analysis/DeviceAnalysis.h" -#include "iree/compiler/Dialect/HAL/IR/HALDialect.h" -#include "iree/compiler/Dialect/HAL/IR/HALOps.h" -#include "iree/compiler/Dialect/HAL/Transforms/Passes.h" -#include "iree/compiler/Dialect/Util/IR/UtilDialect.h" -#include "mlir/Dialect/Arith/IR/Arith.h" -#include "mlir/IR/Attributes.h" -#include "mlir/IR/Builders.h" -#include "mlir/IR/BuiltinTypes.h" -#include "mlir/IR/Diagnostics.h" -#include "mlir/Pass/Pass.h" - -namespace mlir::iree_compiler::IREE::HAL { - -#define GEN_PASS_DEF_FIXUPLEGACYSYNCPASS -#include "iree/compiler/Dialect/HAL/Transforms/Passes.h.inc" - -namespace { - -// Marks a command buffer as being executable inline during recording. -// This is only possible because we generate our command buffer code without -// caching today and know that all are executable inline so long as we have -// blocking queue operations. As soon as we memoize command buffers this will be -// invalid. -static void makeAllowInlineExecution(IREE::HAL::CommandBufferCreateOp op) { - auto modes = op.getModes(); - if (bitEnumContainsAll(modes, - IREE::HAL::CommandBufferModeBitfield::OneShot)) { - op.setModesAttr(IREE::HAL::CommandBufferModeBitfieldAttr::get( - op.getContext(), - modes | IREE::HAL::CommandBufferModeBitfield::AllowInlineExecution)); - } -} - -// Scans backward/forward from |asyncOp| and converts it to blocking form by -// waiting on the wait fences and signal fences if needed. -// We allow any number of non-side-effecting ops to exist between the search -// point and where the waits will be as often times arith ops end up scattered -// around. -// -// Example: -// hal.fence.await until([%wait_fence]) // existing -// // no wait inserted on %wait_fence as present preceeding: -// hal.device.queue.execute wait(%wait_fence) signal(%signal_fence) -// // no wait inserted on %signal_fence as present following: -// hal.fence.await until([%signal_fence]) // existing -static void insertWaitIfNeeded(Operation *asyncOp, - MutableOperandRange waitFence, - Value signalFence) { - assert(waitFence.size() == 1 && "one wait fence expected"); - auto loc = asyncOp->getLoc(); - - // Returns true if waits can be reordered across |op|. - auto isSafeToReorder = [&](Operation &op) { - // For now we just ignore arith ops and constants. - // I hope we can delete this pass before we need more :) - return op.hasTrait() || - op.getDialect()->getNamespace() == "arith"; - }; - - // Returns an operation waiting on |fence| that is guaranteed to have - // executed prior to asyncOp. Returns null if no waits found. - auto beginIt = std::prev(asyncOp->getBlock()->begin()); - auto endIt = std::prev(asyncOp->getBlock()->end()); // ignore terminator - auto findPrecedingAwait = [&](Value fence) -> Operation * { - auto it = std::prev(Block::iterator(asyncOp)); - for (; it != beginIt; --it) { - if (auto awaitOp = dyn_cast(it)) { - if (llvm::is_contained(awaitOp.getFences(), fence)) { - // Wait is for the fence, found! - return &*it; - } else { - // Keep scanning - generally waiting on one fence is enough. - continue; - } - } else if (!isSafeToReorder(*it)) { - break; // hit a point we can't scan past - } - } - return nullptr; - }; - - // Returns an operation waiting on |fence| that is guaranteed to be - // executed after asyncOp. Returns null if no waits found. - auto findSucceedingAwait = [&](Value fence) -> Operation * { - auto it = std::next(Block::iterator(asyncOp)); - for (; it != endIt; ++it) { - if (auto awaitOp = dyn_cast(it)) { - if (llvm::is_contained(awaitOp.getFences(), fence)) { - // Wait is for the fence, found! - return &*it; - } else { - // Keep scanning - generally waiting on one fence is enough. - continue; - } - } else if (!isSafeToReorder(*it)) { - break; // hit a point we can't scan past - } - } - return nullptr; - }; - - OpBuilder builder(asyncOp); - Value timeoutMillis; - auto makeInfiniteTimeout = [&]() { - if (timeoutMillis) - return timeoutMillis; - timeoutMillis = builder.create(loc, -1, 32); - return timeoutMillis; - }; - - // Scan backward to see if the wait fences have been signaled already. - // Since we walk the regions forward we will likely have a wait from the - // producer already. - auto *precedingAwait = findPrecedingAwait(waitFence[0].get()); - if (!precedingAwait) { - builder.create( - loc, builder.getI32Type(), makeInfiniteTimeout(), waitFence[0].get()); - } - if (!isa_and_nonnull( - waitFence[0].get().getDefiningOp())) { - // Neuter wait because it's either covered (we found a preceding await) or - // we just inserted one. - Value nullFence = builder.create( - loc, builder.getType()); - waitFence.assign(nullFence); - } - - // Scan forward to see if the signal fences are waited on already. - auto *succeedingAwait = findSucceedingAwait(signalFence); - if (!succeedingAwait) { - builder.setInsertionPointAfter(asyncOp); - builder.create(loc, builder.getI32Type(), - makeInfiniteTimeout(), signalFence); - } -} - -//===----------------------------------------------------------------------===// -// --iree-hal-fixup-legacy-sync -//===----------------------------------------------------------------------===// - -struct FixupLegacySyncPass - : public IREE::HAL::impl::FixupLegacySyncPassBase { - void runOnOperation() override { - auto moduleOp = getOperation(); - - // Analyze the module to determine which devices need the behavior. - DeviceAnalysis deviceAnalysis(moduleOp); - if (failed(deviceAnalysis.run())) - return signalPassFailure(); - auto isLegacySync = [&](Value deviceValue) { - auto deviceSet = deviceAnalysis.lookupDeviceTargets(deviceValue); - return deviceSet.has_value() ? deviceSet->hasConfigAttrAny("legacy_sync") - : false; - }; - - // This could use an interface but it'd be better to remove the need for - // this pass instead. - for (auto funcOp : moduleOp.getOps()) { - funcOp.walk([&](Operation *op) { - TypeSwitch(op) - .Case([&](IREE::HAL::CommandBufferCreateOp op) { - if (isLegacySync(op.getDevice())) { - makeAllowInlineExecution(op); - } - }) - .Case([&](IREE::HAL::DeviceQueueAllocaOp op) { - if (isLegacySync(op.getDevice())) { - insertWaitIfNeeded(op, op.getWaitFenceMutable(), - op.getSignalFence()); - } - }) - .Case([&](IREE::HAL::DeviceQueueDeallocaOp op) { - if (isLegacySync(op.getDevice())) { - insertWaitIfNeeded(op, op.getWaitFenceMutable(), - op.getSignalFence()); - } - }) - .Case([&](IREE::HAL::DeviceQueueReadOp op) { - if (isLegacySync(op.getDevice())) { - insertWaitIfNeeded(op, op.getWaitFenceMutable(), - op.getSignalFence()); - } - }) - .Case([&](IREE::HAL::DeviceQueueWriteOp op) { - if (isLegacySync(op.getDevice())) { - insertWaitIfNeeded(op, op.getWaitFenceMutable(), - op.getSignalFence()); - } - }) - .Case([&](IREE::HAL::DeviceQueueExecuteOp op) { - if (isLegacySync(op.getDevice())) { - insertWaitIfNeeded(op, op.getWaitFenceMutable(), - op.getSignalFence()); - } - }); - }); - } - } -}; - -} // namespace - -} // namespace mlir::iree_compiler::IREE::HAL diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp index 66d9e2c7c3f6..a9efbf33546f 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp @@ -466,10 +466,6 @@ void buildHALTransformPassPipeline(OpPassManager &passManager, passManager.addPass(IREE::HAL::createOutlineMemoizeRegionsPass()); } - // If any devices require the legacy synchronous execution behavior then - // make all async operations blocking. - passManager.addPass(IREE::HAL::createFixupLegacySyncPass()); - // Prune unused executables and their contents. passManager.addPass(IREE::HAL::createPruneExecutablesPass()); diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.td b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.td index 20d172d19ff3..2f64dac24883 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.td +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.td @@ -187,24 +187,6 @@ def VerifyDevicesPass : ]; } -def FixupLegacySyncPass : - Pass<"iree-hal-fixup-legacy-sync", "mlir::ModuleOp"> { - let summary = "Applies fixups to the program for when using legacy HAL devices."; - let description = [{ - Applies fixups to the program for when using legacy HAL devices that only - support synchronous execution. Once all devices support async this will be - removed. - - NOTE: this pass only exists for backwards compatibility with legacy HAL - drivers. It will be removed once all have migrated to the modern async APIs. - }]; - let dependentDialects = [ - "mlir::arith::ArithDialect", - "IREE::HAL::HALDialect", - "IREE::Util::UtilDialect", - ]; -} - def InlineMemoizeRegionsPass : Pass<"iree-hal-inline-memoize-regions", ""> { let summary = "Inlines `hal.device.memoize` regions into their parent region."; diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/BUILD.bazel b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/BUILD.bazel index aa2130b86114..99697293cb68 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/BUILD.bazel +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/BUILD.bazel @@ -23,7 +23,6 @@ iree_lit_test_suite( "dump_executable_benchmarks.mlir", "dump_executable_sources.mlir", "elide_redundant_commands.mlir", - "fixup_legacy_sync.mlir", "hoist_executable_objects.mlir", "initialize_devices.mlir", "inline_memoize_regions.mlir", diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/CMakeLists.txt b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/CMakeLists.txt index cd20f385ef88..ff7e79c73789 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/CMakeLists.txt +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/CMakeLists.txt @@ -21,7 +21,6 @@ iree_lit_test_suite( "dump_executable_benchmarks.mlir" "dump_executable_sources.mlir" "elide_redundant_commands.mlir" - "fixup_legacy_sync.mlir" "hoist_executable_objects.mlir" "initialize_devices.mlir" "inline_memoize_regions.mlir" diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/fixup_legacy_sync.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/fixup_legacy_sync.mlir deleted file mode 100644 index d217b4784924..000000000000 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/fixup_legacy_sync.mlir +++ /dev/null @@ -1,190 +0,0 @@ -// RUN: iree-opt --split-input-file --iree-hal-fixup-legacy-sync %s | FileCheck %s - -// TODO(multi-device): remove once device globals are used. This is a fallback -// path during the transition. -module attributes { - hal.device.targets = [ - #hal.device.target<"vulkan", {legacy_sync}> : !hal.device - ] -} { -// CHECK-LABEL: @default_device_targets -// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device, %[[AFFINITY:.+]]: i64) -util.func public @default_device_targets(%device: !hal.device, %affinity: i64) { - // CHECK: hal.command_buffer.create device(%[[DEVICE]] : !hal.device) mode("None") - %cmd = hal.command_buffer.create device(%device : !hal.device) mode("None") categories("Transfer|Dispatch") affinity(%affinity) : !hal.command_buffer - util.return -} -} // module - -// ----- - -// Tests that unknown devices (here passed as an arg on a public function) -// don't trigger the pass, as we default to non-legacy behavior. - -// CHECK-LABEL: @unknown_device -// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device, %[[AFFINITY:.+]]: i64) -util.func public @unknown_device(%device: !hal.device, %affinity: i64) { - // CHECK: hal.command_buffer.create device(%[[DEVICE]] : !hal.device) mode("None") - %cmd = hal.command_buffer.create device(%device : !hal.device) mode("None") categories("Transfer|Dispatch") affinity(%affinity) : !hal.command_buffer - util.return -} - -// ----- - -// Tests that command buffers that are reusable don't execute inline. -// Reusable + inline is not a valid combination. - -util.global private @device = #hal.device.target<"vulkan", {legacy_sync}> : !hal.device - -// CHECK-LABEL: @command_buffer_reusable -util.func public @command_buffer_reusable(%affinity: i64) { - // CHECK: %[[DEVICE:.+]] = util.global.load @device - %device = util.global.load @device : !hal.device - // CHECK: hal.command_buffer.create device(%[[DEVICE]] : !hal.device) mode("None") - %cmd = hal.command_buffer.create device(%device : !hal.device) mode("None") categories("Transfer|Dispatch") affinity(%affinity) : !hal.command_buffer - util.return -} - -// ----- - -// Tests that one-shot command buffers are allowed to execute inline. - -util.global private @device = #hal.device.target<"vulkan", {legacy_sync}> : !hal.device - -// CHECK-LABEL: @command_buffer_oneshot -util.func public @command_buffer_oneshot(%affinity: i64) { - // CHECK: %[[DEVICE:.+]] = util.global.load @device - %device = util.global.load @device : !hal.device - // CHECK: hal.command_buffer.create device(%[[DEVICE]] : !hal.device) mode("OneShot|AllowInlineExecution") - %cmd = hal.command_buffer.create device(%device : !hal.device) mode(OneShot) categories("Transfer|Dispatch") affinity(%affinity) : !hal.command_buffer - util.return -} - -// ----- - -// Tests for a no-op if there are no devices requiring legacy mode. - -util.global private @device = #hal.device.select<[ - #hal.device.target<"local", {}>, - #hal.device.target<"vulkan", {}> -]> : !hal.device - -// CHECK-LABEL: @legacy_mode_not_required -util.func public @legacy_mode_not_required(%affinity: i64) { - // CHECK: %[[DEVICE:.+]] = util.global.load @device - %device = util.global.load @device : !hal.device - // CHECK: hal.command_buffer.create device(%[[DEVICE]] : !hal.device) mode(OneShot) - %cmd = hal.command_buffer.create device(%device : !hal.device) mode(OneShot) categories("Transfer|Dispatch") affinity(%affinity) : !hal.command_buffer - util.return -} - -// ----- - -// Tests that any device requiring legacy_sync in a set will trigger the pass. - -util.global private @device = #hal.device.select<[ - #hal.device.target<"local", {}>, - #hal.device.target<"vulkan", {legacy_sync}> -]> : !hal.device - -// CHECK-LABEL: @mixed_legacy_mode_required -util.func public @mixed_legacy_mode_required(%wait: !hal.fence, %cmd: !hal.command_buffer, %signal: !hal.fence) { - %device = util.global.load @device : !hal.device - %affinity = arith.constant 1 : i64 - // CHECK: hal.fence.await - // CHECK: hal.device.queue.execute - // CHECK: hal.fence.await - hal.device.queue.execute<%device : !hal.device> - affinity(%affinity) - wait(%wait) signal(%signal) - commands([%cmd]) - util.return -} - -// ----- - -// Tests that only devices with legacy_sync trigger the pass. - -util.global private @device_async = #hal.device.target<"local", {}> : !hal.device -util.global private @device_sync = #hal.device.target<"vulkan", {legacy_sync}> : !hal.device - -// CHECK-LABEL: @mixed_legacy_mode_scoped -util.func public @mixed_legacy_mode_scoped(%wait: !hal.fence, %cmd: !hal.command_buffer, %signal: !hal.fence) { - // CHECK-DAG: %[[DEVICE_ASYNC:.+]] = util.global.load @device_async - %device_async = util.global.load @device_async : !hal.device - // CHECK-DAG: %[[DEVICE_SYNC:.+]] = util.global.load @device_sync - %device_sync = util.global.load @device_sync : !hal.device - %affinity = arith.constant 1 : i64 - // CHECK-NOT: hal.fence.await - // CHECK: hal.device.queue.execute<%[[DEVICE_ASYNC]] - // CHECK-NOT: hal.fence.await - hal.device.queue.execute<%device_async : !hal.device> - affinity(%affinity) - wait(%wait) signal(%signal) - commands([%cmd]) - // CHECK: hal.fence.await - // CHECK: hal.device.queue.execute<%[[DEVICE_SYNC]] - // CHECK: hal.fence.await - hal.device.queue.execute<%device_sync : !hal.device> - affinity(%affinity) - wait(%wait) signal(%signal) - commands([%cmd]) - util.return -} - -// ----- - -// Tests that queued operations get the appropriate waits before/after. - -util.global private @device = #hal.device.target<"vulkan", {legacy_sync}> : !hal.device - -// CHECK-LABEL: @blocking_execute -// CHECK-SAME: (%[[WAIT:.+]]: !hal.fence, %[[CMD:.+]]: !hal.command_buffer, %[[SIGNAL:.+]]: !hal.fence) -util.func public @blocking_execute(%wait: !hal.fence, %cmd: !hal.command_buffer, %signal: !hal.fence) { - %affinity = arith.constant 1 : i64 - // CHECK: %[[DEVICE:.+]] = util.global.load @device - %device = util.global.load @device : !hal.device - // CHECK-DAG: %[[NULL:.+]] = util.null : !hal.fence - // CHECK-DAG: hal.fence.await until([%[[WAIT]]]) - // CHECK-NEXT: hal.device.queue.execute<%[[DEVICE]] : !hal.device> - // CHECK-SAME: wait(%[[NULL]]) signal(%[[SIGNAL]]) - // CHECK-SAME: commands([%[[CMD]]]) - // CHECK-NEXT: hal.fence.await until([%[[SIGNAL]]]) - hal.device.queue.execute<%device : !hal.device> - affinity(%affinity) - wait(%wait) signal(%signal) - commands([%cmd]) - util.return -} - -// ----- - -// Tests that waits are not inserted if they already exist. - -util.global private @device = #hal.device.target<"vulkan", {legacy_sync}> : !hal.device - -// CHECK-LABEL: @blocking_execute -// CHECK-SAME: (%[[WAIT:.+]]: !hal.fence, %[[CMD:.+]]: !hal.command_buffer, %[[SIGNAL:.+]]: !hal.fence) -util.func public @blocking_execute(%wait: !hal.fence, %cmd: !hal.command_buffer, %signal: !hal.fence) { - // CHECK: %[[DEVICE:.+]] = util.global.load @device - %device = util.global.load @device : !hal.device - // CHECK-NEXT: %[[TIMEOUT:.+]] = arith.constant 100 - %timeout = arith.constant 100 : i32 - // CHECK-NEXT: hal.fence.await until([%[[WAIT]]]) timeout_millis(%[[TIMEOUT]]) - hal.fence.await until([%wait]) timeout_millis(%timeout) : i32 - // This should not block the search: - // CHECK-NEXT: arith.constant 0 - %affinity = arith.constant 0 : i64 - // CHECK-NEXT: %[[NULL:.+]] = util.null : !hal.fence - // CHECK-NEXT: hal.device.queue.execute<%[[DEVICE]] : !hal.device> - // CHECK-SAME: wait(%[[NULL]]) signal(%[[SIGNAL]]) - // CHECK-SAME: commands([%[[CMD]]]) - hal.device.queue.execute<%device : !hal.device> - affinity(%affinity) - wait(%wait) signal(%signal) - commands([%cmd]) - // CHECK-NEXT: hal.fence.await until([%[[SIGNAL]]]) timeout_millis(%[[TIMEOUT]]) - hal.fence.await until([%signal]) timeout_millis(%timeout) : i32 - // CHECK-NEXT: util.return - util.return -} diff --git a/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_rocm_rdna3.json b/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_rocm_rdna3.json index c1a60e0a859f..2ae0ae959a2d 100644 --- a/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_rocm_rdna3.json +++ b/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_rocm_rdna3.json @@ -315,30 +315,13 @@ "onnx/node/generated/test_bernoulli_double_expanded", "onnx/node/generated/test_bernoulli_expanded", "onnx/node/generated/test_cast_FLOAT_to_BFLOAT16", - "onnx/node/generated/test_castlike_BFLOAT16_to_FLOAT", - "onnx/node/generated/test_castlike_BFLOAT16_to_FLOAT_expanded", - "onnx/node/generated/test_castlike_DOUBLE_to_FLOAT", - "onnx/node/generated/test_castlike_DOUBLE_to_FLOAT16", - "onnx/node/generated/test_castlike_DOUBLE_to_FLOAT16_expanded", - "onnx/node/generated/test_castlike_DOUBLE_to_FLOAT_expanded", - "onnx/node/generated/test_castlike_FLOAT16_to_DOUBLE", - "onnx/node/generated/test_castlike_FLOAT16_to_DOUBLE_expanded", - "onnx/node/generated/test_castlike_FLOAT16_to_FLOAT", - "onnx/node/generated/test_castlike_FLOAT16_to_FLOAT_expanded", "onnx/node/generated/test_castlike_FLOAT_to_BFLOAT16", "onnx/node/generated/test_castlike_FLOAT_to_BFLOAT16_expanded", - "onnx/node/generated/test_castlike_FLOAT_to_DOUBLE", - "onnx/node/generated/test_castlike_FLOAT_to_DOUBLE_expanded", - "onnx/node/generated/test_castlike_FLOAT_to_FLOAT16", - "onnx/node/generated/test_castlike_FLOAT_to_FLOAT16_expanded", "onnx/node/generated/test_constantofshape_float_ones", "onnx/node/generated/test_constantofshape_int_shape_zero", "onnx/node/generated/test_constantofshape_int_zeros", "onnx/node/generated/test_convtranspose_output_shape", "onnx/node/generated/test_dropout_default_mask_ratio", - "onnx/node/generated/test_eyelike_populate_off_main_diagonal", - "onnx/node/generated/test_eyelike_with_dtype", - "onnx/node/generated/test_eyelike_without_dtype", "onnx/node/generated/test_gridsample_nearest", "onnx/node/generated/test_gridsample_nearest_align_corners_0_additional_1", "onnx/node/generated/test_gridsample_nearest_align_corners_1_additional_1", @@ -352,20 +335,15 @@ "onnx/node/generated/test_qlinearmatmul_3D_int8_float32", "onnx/node/generated/test_qlinearmatmul_3D_uint8_float16", "onnx/node/generated/test_qlinearmatmul_3D_uint8_float32", - "onnx/node/generated/test_range_float_type_positive_delta", - "onnx/node/generated/test_range_int32_type_negative_delta", "onnx/node/generated/test_reduce_l1_default_axes_keepdims_example", "onnx/node/generated/test_reduce_l1_default_axes_keepdims_example_expanded", "onnx/node/generated/test_reduce_l1_default_axes_keepdims_random", "onnx/node/generated/test_reduce_l1_default_axes_keepdims_random_expanded", "onnx/node/generated/test_reduce_l2_default_axes_keepdims_example", "onnx/node/generated/test_reduce_l2_default_axes_keepdims_random", - "onnx/node/generated/test_reduce_log_sum_asc_axes", "onnx/node/generated/test_reduce_log_sum_default", - "onnx/node/generated/test_reduce_log_sum_desc_axes", "onnx/node/generated/test_reduce_log_sum_exp_default_axes_keepdims_example", "onnx/node/generated/test_reduce_log_sum_exp_default_axes_keepdims_random", - "onnx/node/generated/test_reduce_log_sum_negative_axes", "onnx/node/generated/test_reduce_max_bool_inputs", "onnx/node/generated/test_reduce_max_empty_set", "onnx/node/generated/test_reduce_mean_default_axes_keepdims_example", @@ -388,24 +366,14 @@ "onnx/node/generated/test_sce_mean_weight_ii", "onnx/node/generated/test_sce_mean_weight_ii_log_prob", "onnx/node/generated/test_sce_mean_weight_log_prob", - "onnx/node/generated/test_shape", - "onnx/node/generated/test_shape_clip_end", - "onnx/node/generated/test_shape_clip_start", "onnx/node/generated/test_shape_end_1", - "onnx/node/generated/test_shape_end_negative_1", - "onnx/node/generated/test_shape_example", - "onnx/node/generated/test_shape_start_1", "onnx/node/generated/test_shape_start_1_end_2", "onnx/node/generated/test_shape_start_1_end_negative_1", "onnx/node/generated/test_shape_start_negative_1", "onnx/node/generated/test_size", "onnx/node/generated/test_size_example", - "onnx/node/generated/test_slice_default_axes", "onnx/node/generated/test_split_zero_size_splits_opset13", "onnx/node/generated/test_split_zero_size_splits_opset18", - "onnx/node/generated/test_top_k", - "onnx/node/generated/test_top_k_negative_axis", - "onnx/node/generated/test_top_k_smallest", "onnx/node/generated/test_tril_zero", "onnx/node/generated/test_triu_zero" ]