Skip to content

Commit

Permalink
Added test
Browse files Browse the repository at this point in the history
  • Loading branch information
AndreyPavlenko committed Sep 23, 2024
1 parent 81583d4 commit 0ebb9d7
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 42 deletions.
4 changes: 1 addition & 3 deletions include/gc/ExecutionEngine/GPURuntime/GpuOclRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -299,10 +299,8 @@ template <unsigned N> struct OclModuleExecutorBase {
assert(argCounter == mod->functionType.getNumInputs());
}

void checkArg(void *alignedPtr, bool isUsm = true) const {
void checkArg(const void *alignedPtr, bool isUsm = true) const {
assert(!isUsm || mod->runtime.isUsm(alignedPtr));
// It's recommended to have at least 16-byte alignment
assert(reinterpret_cast<std::uintptr_t>(alignedPtr) % 16 == 0);
}
#endif
};
Expand Down
1 change: 0 additions & 1 deletion lib/gc/Transforms/GPU/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,4 @@ endforeach ()

target_include_directories(GcGpuPasses PUBLIC ${IMEX_BUILD_INCLUDES})
target_link_libraries(GcGpuPasses PUBLIC ${IMEX_LIBS})
target_link_libraries(GcPasses PUBLIC GcGpuPasses)
set_property(GLOBAL APPEND PROPERTY IMEX_LIBS ${IMEX_LIBS})
1 change: 1 addition & 0 deletions test/mlir/unittests/ExecutionEngine/GPU/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ add_mlir_unittest(GCExecutionEngineGpuTests
)
target_link_libraries(GCExecutionEngineGpuTests
PRIVATE
GcGpuPasses
GcJitWrapper
GcGpuOclRuntime
)
141 changes: 103 additions & 38 deletions test/mlir/unittests/ExecutionEngine/GPU/GpuOclRuntimeTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@
#include "gtest/gtest.h"
#include <memory>

#include <mlir/Dialect/GPU/Transforms/Passes.h>

#include "gc/Transforms/Passes.h"
#include "mlir/Target/LLVMIR/Export.h"
#include "mlir/Target/LLVMIR/ModuleTranslation.h"
#include <CL/cl_ext.h>
Expand All @@ -31,12 +28,12 @@ using namespace gc::gpu;

constexpr char addStatic[] = R"mlir(
module @test {
func.func @entry(%arg0: memref<32x32xf32>, %arg1: memref<32x32xf32>, %arg2: memref<32x32xf32>) {
%0 = bufferization.to_tensor %arg0 restrict : memref<32x32xf32>
%1 = bufferization.to_tensor %arg1 restrict : memref<32x32xf32>
%2 = tensor.empty() : tensor<32x32xf32>
%3 = linalg.add ins(%1, %0 : tensor<32x32xf32>, tensor<32x32xf32>) outs(%2 : tensor<32x32xf32>) -> tensor<32x32xf32>
bufferization.materialize_in_destination %3 in restrict writable %arg2 : (tensor<32x32xf32>, memref<32x32xf32>) -> ()
func.func @entry(%arg0: memref<64x64xf32>, %arg1: memref<64x64xf32>, %arg2: memref<64x64xf32>) {
%0 = bufferization.to_tensor %arg0 restrict : memref<64x64xf32>
%1 = bufferization.to_tensor %arg1 restrict : memref<64x64xf32>
%2 = tensor.empty() : tensor<64x64xf32>
%3 = linalg.add ins(%1, %0 : tensor<64x64xf32>, tensor<64x64xf32>) outs(%2 : tensor<64x64xf32>) -> tensor<64x64xf32>
bufferization.materialize_in_destination %3 in restrict writable %arg2 : (tensor<64x64xf32>, memref<64x64xf32>) -> ()
return
}
}
Expand All @@ -59,40 +56,69 @@ module @test {
}
)mlir";

template <unsigned N, unsigned M = N> struct TestAdd {
constexpr char matmulAddStatic[] = R"mlir(
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
func.func @entry(%arg0: memref<64x128xf32>, %arg1: memref<128x128xf32>, %arg2: memref<64x128xf32>) {
%0 = bufferization.to_tensor %arg0 restrict : memref<64x128xf32>
%1 = bufferization.to_tensor %arg1 restrict : memref<128x128xf32>
%2 = tensor.empty() : tensor<64x128xf32>
%cst = arith.constant 0.000000e+00 : f32
%3 = linalg.fill ins(%cst : f32) outs(%2 : tensor<64x128xf32>) -> tensor<64x128xf32>
%4 = linalg.matmul_transpose_b ins(%0, %1 : tensor<64x128xf32>, tensor<128x128xf32>) outs(%3 : tensor<64x128xf32>) -> tensor<64x128xf32>
%5 = tensor.empty() : tensor<64x128xf32>
%6 = linalg.add ins(%4, %0 : tensor<64x128xf32>, tensor<64x128xf32>) outs(%5 : tensor<64x128xf32>) -> tensor<64x128xf32>
bufferization.materialize_in_destination %6 in restrict writable %arg2 : (tensor<64x128xf32>, memref<64x128xf32>) -> ()
return
}
}
)mlir";

struct TestBase {
OclRuntime runtime = gcGetOrReport(OclRuntime::get());
cl_command_queue queue = gcGetOrReport(runtime.createQueue());
OclContext ctx{runtime, queue};
MLIRContext mlirCtx{gc::initCompilerAndGetDialects()};

virtual void exec(std::shared_ptr<const OclModule> &mod) = 0;

virtual ~TestBase() { gcGetOrReport(runtime.releaseQueue(queue)); }

OwningOpRef<ModuleOp> parse(const char *code) {
std::unique_ptr<llvm::MemoryBuffer> memBuf =
llvm::MemoryBuffer::getMemBuffer(code);
llvm::SourceMgr srcMgr;
srcMgr.AddNewSourceBuffer(std::move(memBuf), SMLoc());
return parseSourceFile<ModuleOp>(srcMgr, &mlirCtx);
}
};

template <unsigned N, unsigned M = N> struct TestAdd : TestBase {
static constexpr unsigned size = N * M;
float *buf0 = gcGetOrReport(runtime.usmNewDev<float>(size));
float *buf1 = gcGetOrReport(runtime.usmNewDev<float>(size));
float *buf2 = gcGetOrReport(runtime.usmNewShared<float>(size));
MLIRContext mlirCtx{gc::initCompilerAndGetDialects()};
float cpuBuf1[size] = {};
float cpuBuf2[size] = {};

explicit TestAdd() { std::fill(cpuBuf1, cpuBuf1 + size, 2.0f); }
explicit TestAdd() {
float cpuBuf[size];
std::fill(cpuBuf, cpuBuf + size, 2.0f);
assert(runtime.usmCpy(ctx, cpuBuf, buf0, size));
assert(runtime.usmCpy(ctx, cpuBuf, buf1, size));
gcGetOrReport(ctx.finish());
}

virtual ~TestAdd() {
gcGetOrReport(runtime.releaseQueue(queue));
~TestAdd() override {
assert(runtime.usmFree(buf0));
assert(runtime.usmFree(buf1));
assert(runtime.usmFree(buf2));
}

virtual void exec(std::shared_ptr<const OclModule> &mod, OclContext &ctx) = 0;

void test(const char *code) {
OclContext ctx(runtime, queue);
assert(runtime.usmCpy(ctx, cpuBuf1, buf0, size));
assert(runtime.usmCpy(ctx, cpuBuf1, buf1, size));

OclModuleBuilder builder(parse(code));
auto mod = gcGetOrReport(builder.build(runtime));
exec(mod);

exec(mod, ctx);

assert(runtime.usmCpy(ctx, buf2, cpuBuf2, size));
float cpuBuf[size];
assert(runtime.usmCpy(ctx, buf2, cpuBuf, size));
gcGetOrReport(ctx.finish());

for (unsigned i = 0; i < size; i++) {
Expand All @@ -101,24 +127,51 @@ template <unsigned N, unsigned M = N> struct TestAdd {
}
// std::cout << "\n";

for (float i : cpuBuf2) {
// std::cout << cpuBuf2[i] << " ";
for (float i : cpuBuf) {
// std::cout << i << " ";
assert(i == 4.0f);
}
}
};

OwningOpRef<ModuleOp> parse(const char *code) {
std::unique_ptr<llvm::MemoryBuffer> memBuf =
llvm::MemoryBuffer::getMemBuffer(code);
llvm::SourceMgr srcMgr;
srcMgr.AddNewSourceBuffer(std::move(memBuf), SMLoc());
return parseSourceFile<ModuleOp>(srcMgr, &mlirCtx);
template <unsigned N, unsigned M = N> struct TestMatmulAdd : TestBase {
static constexpr unsigned size1 = N * M;
static constexpr unsigned size2 = M * M;
float *buf0 = gcGetOrReport(runtime.usmNewDev<float>(size1));
float *buf1 = gcGetOrReport(runtime.usmNewDev<float>(size2));
float *buf2 = gcGetOrReport(runtime.usmNewShared<float>(size1));

explicit TestMatmulAdd() {
float cpuBuf[size2];
std::fill(cpuBuf, cpuBuf + size2, 2);
assert(runtime.usmCpy(ctx, cpuBuf, buf0, size1));
assert(runtime.usmCpy(ctx, cpuBuf, buf1, size2));
gcGetOrReport(ctx.finish());
}

~TestMatmulAdd() override {
assert(runtime.usmFree(buf0));
assert(runtime.usmFree(buf1));
assert(runtime.usmFree(buf2));
}

void test(const char *code) {
OclModuleBuilder builder(parse(code));
auto mod = gcGetOrReport(builder.build(runtime));
exec(mod);

gcGetOrReport(ctx.finish());
for (unsigned i = 0; i < size1; i++) {
// std::cout << buf2[i] << " ";
assert(buf2[i] == 514);
}
// std::cout << "\n";
}
};

TEST(GpuOclRuntime, TestAddStatic) {
struct TestAddStatic1 : TestAdd<32> {
void exec(std::shared_ptr<const OclModule> &mod, OclContext &ctx) override {
struct TestAddStatic1 : TestAdd<64> {
void exec(std::shared_ptr<const OclModule> &mod) override {
assert(mod->isStatic);
StaticExecutor<3> exec(mod);
exec(ctx, buf0, buf1, buf2);
Expand All @@ -128,8 +181,8 @@ TEST(GpuOclRuntime, TestAddStatic) {
} test1;
test1.test(addStatic);

struct TestAddStatic2 : TestAdd<32> {
void exec(std::shared_ptr<const OclModule> &mod, OclContext &ctx) override {
struct TestAddStatic2 : TestAdd<64> {
void exec(std::shared_ptr<const OclModule> &mod) override {
assert(mod->isStatic);
StaticExecutor<3> exec(mod);
exec.arg(buf0);
Expand All @@ -146,7 +199,7 @@ TEST(GpuOclRuntime, TestAddStatic) {
TEST(GpuOclRuntime, TestAddDynamic) {
GTEST_SKIP() << "Dynamic shapes are not yet supported";
struct TestAddDynamic : TestAdd<32, 64> {
void exec(std::shared_ptr<const OclModule> &mod, OclContext &ctx) override {
void exec(std::shared_ptr<const OclModule> &mod) override {
assert(!mod->isStatic);
int64_t shape[] = {32, 64};
int64_t strides[] = {64, 1};
Expand All @@ -161,3 +214,15 @@ TEST(GpuOclRuntime, TestAddDynamic) {
} test;
test.test(addDynamic);
}

TEST(GpuOclRuntime, TestMatmulAddStatic) {
struct Test : TestMatmulAdd<64, 128> {
void exec(std::shared_ptr<const OclModule> &mod) override {
assert(mod->isStatic);
StaticExecutor<3> exec(mod);
exec(ctx, buf0, buf1, buf2);
assert(exec.isSmall());
}
} test;
test.test(matmulAddStatic);
}

0 comments on commit 0ebb9d7

Please sign in to comment.