Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Dec 23, 2022
1 parent bd6b58d commit c015143
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 89 deletions.
1 change: 0 additions & 1 deletion taichi/runtime/llvm/llvm_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@
#include "taichi/runtime/llvm/llvm_context_pass.h"
#endif


#ifdef _WIN32
// Travis CI seems doesn't support <filesystem>...
#include <filesystem>
Expand Down
187 changes: 99 additions & 88 deletions taichi/runtime/llvm/llvm_context_pass.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#pragma once

#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Function.h"
Expand All @@ -19,106 +19,117 @@ namespace lang {
using namespace llvm;
#if defined(TI_WITH_AMDGPU)
struct AMDGPUConvertAllocaInstAddressSpacePass : public FunctionPass {
static char ID;
AMDGPUConvertAllocaInstAddressSpacePass() : FunctionPass(ID) {}
bool runOnFunction(llvm::Function &f) override {
f.addFnAttr("target-cpu", "gfx" + AMDGPUContext::get_instance().get_mcpu().substr(3,4));
f.addFnAttr("target-features","");
for (auto &bb: f) {
std::vector<AllocaInst*> alloca_inst_vec;
for (Instruction &inst : bb) {
AllocaInst* now_alloca = dyn_cast<AllocaInst>(&inst);
if (!now_alloca ||
now_alloca->getType()->getAddressSpace() != (unsigned)0) {
continue;
}
alloca_inst_vec.push_back(now_alloca);
}
for (auto &allocainst : alloca_inst_vec) {
auto alloca_type = allocainst->getAllocatedType();
IRBuilder<> builder(allocainst);
auto *new_alloca = builder.CreateAlloca(alloca_type, (unsigned)5);
auto new_type = llvm::PointerType::get(alloca_type, (unsigned)0);
new_alloca->setAlignment(Align(allocainst->getAlign().value()));
auto *addrspacecast = builder.CreateAddrSpaceCast(new_alloca, new_type);
allocainst->replaceAllUsesWith(addrspacecast);
allocainst->eraseFromParent();
}
static char ID;
AMDGPUConvertAllocaInstAddressSpacePass() : FunctionPass(ID) {
}
bool runOnFunction(llvm::Function &f) override {
f.addFnAttr("target-cpu",
"gfx" + AMDGPUContext::get_instance().get_mcpu().substr(3, 4));
f.addFnAttr("target-features", "");
for (auto &bb : f) {
std::vector<AllocaInst *> alloca_inst_vec;
for (Instruction &inst : bb) {
AllocaInst *now_alloca = dyn_cast<AllocaInst>(&inst);
if (!now_alloca ||
now_alloca->getType()->getAddressSpace() != (unsigned)0) {
continue;
}
return false;
alloca_inst_vec.push_back(now_alloca);
}
for (auto &allocainst : alloca_inst_vec) {
auto alloca_type = allocainst->getAllocatedType();
IRBuilder<> builder(allocainst);
auto *new_alloca = builder.CreateAlloca(alloca_type, (unsigned)5);
auto new_type = llvm::PointerType::get(alloca_type, (unsigned)0);
new_alloca->setAlignment(Align(allocainst->getAlign().value()));
auto *addrspacecast = builder.CreateAddrSpaceCast(new_alloca, new_type);
allocainst->replaceAllUsesWith(addrspacecast);
allocainst->eraseFromParent();
}
}
return false;
}
};

struct AMDGPUConvertFuncParamAddressSpacePass : public ModulePass {
static char ID;
AMDGPUConvertFuncParamAddressSpacePass() : ModulePass(ID) {}
bool runOnModule(llvm::Module &M) override {
for (auto &f : M) {
bool is_kernel = false;
const std::string func_name = f.getName().str();
if (starts_with(func_name, "runtime_")) {
f.setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
// ref https://llvm.org/docs/AMDGPUUsage.html
// “amdgpu-flat-work-group-size”=”min,max”
// Specify the minimum and maximum flat work group sizes that will be specified when the kernel is dispatched.
// Generated by the amdgpu_flat_work_group_size CLANG attribute [CLANG-ATTR].
// The implied default value is 1,1024.
f.addFnAttr("amdgpu-flat-work-group-size", "1, 1024");
is_kernel = true;
}
if (!is_kernel && !f.isDeclaration())
f.setLinkage(llvm::Function::PrivateLinkage);
}
std::vector<llvm::Function *> kernel_function;
for (auto &f : M) {
if (f.getCallingConv() == llvm::CallingConv::AMDGPU_KERNEL)
kernel_function.push_back(&f);
static char ID;
AMDGPUConvertFuncParamAddressSpacePass() : ModulePass(ID) {
}
bool runOnModule(llvm::Module &M) override {
for (auto &f : M) {
bool is_kernel = false;
const std::string func_name = f.getName().str();
if (starts_with(func_name, "runtime_")) {
f.setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
// ref https://llvm.org/docs/AMDGPUUsage.html
// “amdgpu-flat-work-group-size”=”min,max”
// Specify the minimum and maximum flat work group sizes that will be
// specified when the kernel is dispatched. Generated by the
// amdgpu_flat_work_group_size CLANG attribute [CLANG-ATTR]. The implied
// default value is 1,1024.
f.addFnAttr("amdgpu-flat-work-group-size", "1, 1024");
is_kernel = true;
}
for (auto &f : kernel_function) {
llvm::FunctionType *func_type = f->getFunctionType();
std::vector<llvm::Type*> new_func_params;
for (auto &arg : f->args()) {
if (arg.getType()->getTypeID() == llvm::Type::PointerTyID) {
auto new_type = llvm::PointerType::get(arg.getType()->getPointerElementType(), unsigned(1));
new_func_params.push_back(new_type);
}
else {
new_func_params.push_back(arg.getType());
}
if (!is_kernel && !f.isDeclaration())
f.setLinkage(llvm::Function::PrivateLinkage);
}
std::vector<llvm::Function *> kernel_function;
for (auto &f : M) {
if (f.getCallingConv() == llvm::CallingConv::AMDGPU_KERNEL)
kernel_function.push_back(&f);
}
for (auto &f : kernel_function) {
llvm::FunctionType *func_type = f->getFunctionType();
std::vector<llvm::Type *> new_func_params;
for (auto &arg : f->args()) {
if (arg.getType()->getTypeID() == llvm::Type::PointerTyID) {
auto new_type = llvm::PointerType::get(
arg.getType()->getPointerElementType(), unsigned(1));
new_func_params.push_back(new_type);
} else {
new_func_params.push_back(arg.getType());
}
auto new_func_type = llvm::FunctionType::get(func_type->getReturnType(), new_func_params, false);
auto new_func = llvm::Function::Create(new_func_type, f->getLinkage(), f->getAddressSpace());
new_func->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
new_func->addFnAttr("amdgpu-flat-work-group-size", "1, 1024");
new_func->addFnAttr("target-cpu", "gfx" + AMDGPUContext::get_instance().get_mcpu().substr(3,4));
new_func->setComdat(f->getComdat());
f->getParent()->getFunctionList().insert(f->getIterator(), new_func);
new_func->takeName(f);
new_func->getBasicBlockList().splice(new_func->begin(), f->getBasicBlockList());
for (llvm::Function::arg_iterator I = f->arg_begin(), E = f->arg_end(),
I2 = new_func->arg_begin(); I != E; ++I, ++I2) {
if (I->getType()->getTypeID() == llvm::Type::PointerTyID) {
auto &front_bb = new_func->getBasicBlockList().front();
llvm::Instruction *addrspacecast = new AddrSpaceCastInst(I2, I->getType());
front_bb.getInstList().insertAfter(front_bb.getFirstInsertionPt(), addrspacecast);
I->replaceAllUsesWith(addrspacecast);
I2->takeName(&*I);
}
else {
I->replaceAllUsesWith(&*I2);
I2->takeName(&*I);
}
}
auto new_func_type = llvm::FunctionType::get(func_type->getReturnType(),
new_func_params, false);
auto new_func = llvm::Function::Create(new_func_type, f->getLinkage(),
f->getAddressSpace());
new_func->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
new_func->addFnAttr("amdgpu-flat-work-group-size", "1, 1024");
new_func->addFnAttr(
"target-cpu",
"gfx" + AMDGPUContext::get_instance().get_mcpu().substr(3, 4));
new_func->setComdat(f->getComdat());
f->getParent()->getFunctionList().insert(f->getIterator(), new_func);
new_func->takeName(f);
new_func->getBasicBlockList().splice(new_func->begin(),
f->getBasicBlockList());
for (llvm::Function::arg_iterator I = f->arg_begin(), E = f->arg_end(),
I2 = new_func->arg_begin();
I != E; ++I, ++I2) {
if (I->getType()->getTypeID() == llvm::Type::PointerTyID) {
auto &front_bb = new_func->getBasicBlockList().front();
llvm::Instruction *addrspacecast =
new AddrSpaceCastInst(I2, I->getType());
front_bb.getInstList().insertAfter(front_bb.getFirstInsertionPt(),
addrspacecast);
I->replaceAllUsesWith(addrspacecast);
I2->takeName(&*I);
} else {
I->replaceAllUsesWith(&*I2);
I2->takeName(&*I);
}

f->eraseFromParent();
}
return false;

f->eraseFromParent();
}
return false;
}
};

char AMDGPUConvertAllocaInstAddressSpacePass::ID = 0;
char AMDGPUConvertFuncParamAddressSpacePass::ID = 0;
#endif

} // namespace lang
} // namespace taichi
} // namespace lang
} // namespace taichi

0 comments on commit c015143

Please sign in to comment.