diff --git a/enzyme/Enzyme/Enzyme.cpp b/enzyme/Enzyme/Enzyme.cpp index 1688f96275ed..b215452b4d27 100644 --- a/enzyme/Enzyme/Enzyme.cpp +++ b/enzyme/Enzyme/Enzyme.cpp @@ -1,3 +1,4 @@ + //===- Enzyme.cpp - Automatic Differentiation Transformation Pass -------===// // // Enzyme Project @@ -1314,6 +1315,31 @@ class EnzymeBase { return type_args; } + bool HandleTruncate(CallInst *CI) { + IRBuilder<> Builder(CI); + Function *F = parseFunctionParameter(CI); + if (!F) + return false; + if (CI->arg_size() != 3) { + EmitFailure("TooManyArgs", CI->getDebugLoc(), CI, + "Had incorrect number of args to __enzyme_truncate", *CI, + " - expected 3"); + return false; + } + auto Cfrom = cast(CI->getArgOperand(1)); + assert(Cfrom); + auto Cto = cast(CI->getArgOperand(2)); + assert(Cto); + RequestContext context(CI, &Builder); + llvm::Value* res = Logic.CreateTruncate(context, F, (unsigned)Cfrom->getValue().getZExtValue(), (unsigned)Cto->getValue().getZExtValue() ); + if (!res) + return false; + res = Builder.CreatePointerCast(res, CI->getType()); + CI->replaceAllUsesWith(res); + CI->eraseFromParent(); + return true; + } + bool HandleBatch(CallInst *CI) { unsigned width = 1; unsigned truei = 0; @@ -2028,6 +2054,7 @@ class EnzymeBase { Fn->getName().contains("__enzyme_augmentfwd") || Fn->getName().contains("__enzyme_augmentsize") || Fn->getName().contains("__enzyme_reverse") || + Fn->getName().contains("__enzyme_truncate") || Fn->getName().contains("__enzyme_batch") || Fn->getName().contains("__enzyme_trace") || Fn->getName().contains("__enzyme_condition"))) @@ -2060,6 +2087,7 @@ class EnzymeBase { MapVector toVirtual; MapVector toSize; SmallVector toBatch; + SmallVector toTruncate; MapVector toProbProg; SetVector InactiveCalls; SetVector IterCalls; @@ -2369,6 +2397,7 @@ class EnzymeBase { bool virtualCall = false; bool sizeOnly = false; bool batch = false; + bool truncate = false; bool probProg = false; DerivativeMode derivativeMode; ProbProgMode probProgMode; @@ -2398,6 +2427,9 @@ class EnzymeBase { } else if (Fn->getName().contains("__enzyme_batch")) { enableEnzyme = true; batch = true; + } else if (Fn->getName().contains("__enzyme_truncate")) { + enableEnzyme = true; + truncate = true; } else if (Fn->getName().contains("__enzyme_likelihood")) { enableEnzyme = true; probProgMode = ProbProgMode::Likelihood; @@ -2455,6 +2487,8 @@ class EnzymeBase { toSize[CI] = derivativeMode; else if (batch) toBatch.push_back(CI); + else if (truncate) + toTruncate.push_back(CI); else if (probProg) { toProbProg[CI] = probProgMode; } else @@ -2548,6 +2582,9 @@ class EnzymeBase { for (auto call : toBatch) { HandleBatch(call); } + for (auto call : toTruncate) { + HandleTruncate(call); + } for (auto &&[call, mode] : toProbProg) { HandleProbProg(call, mode, calls); diff --git a/enzyme/Enzyme/EnzymeLogic.cpp b/enzyme/Enzyme/EnzymeLogic.cpp index da98b24a6e02..45498f97a7c9 100644 --- a/enzyme/Enzyme/EnzymeLogic.cpp +++ b/enzyme/Enzyme/EnzymeLogic.cpp @@ -4808,6 +4808,485 @@ Function *EnzymeLogic::CreateForwardDiff( return nf; } +class TruncateGenerator : public llvm::InstVisitor { +private: +ValueToValueMapTy &originalToNewFn; +unsigned fromwidth; +unsigned towidth; +Function* oldFunc; +Function* newFunc; +AllocaInst* tmpBlock; +EnzymeLogic &Logic; + +public: +TruncateGenerator(ValueToValueMapTy &originalToNewFn, unsigned fromwidth, unsigned towidth, Function* oldFunc, Function* newFunc, EnzymeLogic& Logic) : + originalToNewFn(originalToNewFn), fromwidth(fromwidth), towidth(towidth), oldFunc(oldFunc), newFunc(newFunc), Logic(Logic) { + IRBuilder <> B(&newFunc->getEntryBlock().front()); + tmpBlock = B.CreateAlloca(getTypeForWidth(fromwidth)); + } + + void visitInstruction(llvm::Instruction &inst) { + using namespace llvm; + + // TODO explicitly handle all instructions rather than using the catch all + // below + + switch (inst.getOpcode()) { +//#include "InstructionDerivatives.inc" + default: + break; + } + + todo(inst); + } + + Type* getTypeForWidth(unsigned width) { + switch(width){ + default: + return llvm::Type::getIntNTy(oldFunc->getContext(), width); + case 64: + return llvm::Type::getDoubleTy(oldFunc->getContext()); + case 32: + return llvm::Type::getFloatTy(oldFunc->getContext()); + case 16: + return llvm::Type::getHalfTy(oldFunc->getContext()); + } + } + Value *truncate(IRBuilder<> &B, Value* v) { + Type* nextType = getTypeForWidth(towidth); + B.CreateStore(v, B.CreatePointerCast(tmpBlock, PointerType::getUnqual(v->getType()))); + return B.CreateLoad(nextType, B.CreatePointerCast(tmpBlock, PointerType::getUnqual(nextType))); + } + + Value *expand(IRBuilder<> &B, Value* v, Type* origT) { + auto c0 = Constant::getNullValue(llvm::Type::getIntNTy(oldFunc->getContext(), fromwidth)); + B.CreateStore(c0, B.CreatePointerCast(tmpBlock, PointerType::getUnqual(c0->getType()))); + B.CreateStore(v, B.CreatePointerCast(tmpBlock, PointerType::getUnqual(v->getType()))); + return B.CreateLoad(origT, B.CreatePointerCast(tmpBlock, PointerType::getUnqual(origT))); + } + + void todo(llvm::Instruction &I) { + std::string s; + llvm::raw_string_ostream ss(s); + ss << "cannot handle unknown instruction\n" << I; + if (CustomErrorHandler) { + IRBuilder<> Builder2(getNewFromOriginal(&I)); + CustomErrorHandler(ss.str().c_str(), wrap(&I), ErrorType::NoTruncate, + this, nullptr, wrap(&Builder2)); + return; + } else { + EmitFailure("NoTruncate", I.getDebugLoc(), &I, ss.str()); + return; + } + } + + void visitAllocaInst(llvm::AllocaInst &I) { + return; + } + void visitICmpInst(llvm::ICmpInst &I) { + return; + } + void visitFCmpInst(llvm::FCmpInst &I) { + todo(I); + return; + } + void visitLoadInst(llvm::LoadInst &LI) { + auto alignment = LI.getAlign(); + visitLoadLike(LI, alignment); + } + void visitStoreInst(llvm::StoreInst &SI) { + auto align = SI.getAlign(); + visitCommonStore(SI, SI.getPointerOperand(), SI.getValueOperand(), align, + SI.isVolatile(), SI.getOrdering(), SI.getSyncScopeID(), + /*mask=*/nullptr); + } + void visitGetElementPtrInst(llvm::GetElementPtrInst &gep) { + return; + } + void visitPHINode(llvm::PHINode &phi) { + return; + } + void visitCastInst(llvm::CastInst &phi) { + todo(phi); + return; + } + void visitSelectInst(llvm::SelectInst &SI) { + todo(SI); + return; + } + void visitExtractElementInst(llvm::ExtractElementInst &EEI) { + return; + } + void visitInsertElementInst(llvm::InsertElementInst &EEI) { + return; + } + void visitShuffleVectorInst(llvm::ShuffleVectorInst &EEI) { + return; + } + void visitExtractValueInst(llvm::ExtractValueInst &EEI) { + return; + } + void visitInsertValueInst(llvm::InsertValueInst &EEI) { + return; + } + void visitBinaryOperator(llvm::BinaryOperator &BO) { + + switch(BO.getOpcode()) { + default: break; + case BinaryOperator::Add: + case BinaryOperator::Sub: + case BinaryOperator::Mul: + case BinaryOperator::UDiv: + case BinaryOperator::SDiv: + case BinaryOperator::URem: + case BinaryOperator::SRem: + case BinaryOperator::AShr: + case BinaryOperator::LShr: + case BinaryOperator::Shl: + case BinaryOperator::And: + case BinaryOperator::Or: + case BinaryOperator::Xor: + return; + } + + if (towidth == 32 || towidth == 16 || towidth == 64) { + auto newI = getNewFromOriginal(&BO); + IRBuilder<> B(newI); + switch(BO.getOpcode()) { + default: break; + case BinaryOperator::FMul: + { + auto nres = cast(B.CreateFMul(truncate(B, getNewFromOriginal(BO.getOperand(0))), truncate(B, getNewFromOriginal(BO.getOperand(1))))); + nres->takeName(newI); + nres->copyIRFlags(newI); + newI->replaceAllUsesWith(expand(B, nres, BO.getType())); + newI->eraseFromParent(); + } + return; + case BinaryOperator::FAdd: + { + auto nres = cast(B.CreateFAdd(truncate(B, getNewFromOriginal(BO.getOperand(0))), truncate(B, getNewFromOriginal(BO.getOperand(1))))); + nres->takeName(newI); + nres->copyIRFlags(newI); + newI->replaceAllUsesWith(expand(B, nres, BO.getType())); + newI->eraseFromParent(); + } + return; + case BinaryOperator::FSub: + { + auto nres = cast(B.CreateFSub(truncate(B, getNewFromOriginal(BO.getOperand(0))), truncate(B, getNewFromOriginal(BO.getOperand(1))))); + nres->takeName(newI); + nres->copyIRFlags(newI); + newI->replaceAllUsesWith(expand(B, nres, BO.getType())); + newI->eraseFromParent(); + } + return; + case BinaryOperator::FDiv: + { + auto nres = cast(B.CreateFDiv(truncate(B, getNewFromOriginal(BO.getOperand(0))), truncate(B, getNewFromOriginal(BO.getOperand(1))))); + nres->takeName(newI); + nres->copyIRFlags(newI); + newI->replaceAllUsesWith(expand(B, nres, BO.getType())); + newI->eraseFromParent(); + } + return; + case BinaryOperator::FRem: + { + auto nres = cast(B.CreateFRem(truncate(B, getNewFromOriginal(BO.getOperand(0))), truncate(B, getNewFromOriginal(BO.getOperand(1))))); + nres->takeName(newI); + nres->copyIRFlags(newI); + newI->replaceAllUsesWith(expand(B, nres, BO.getType())); + newI->eraseFromParent(); + } + return; + } + } + todo(BO); + return; + } + void visitMemSetInst(llvm::MemSetInst &MS) { + visitMemSetCommon(MS); + } + void visitMemSetCommon(llvm::CallInst &MS) { + return; + } + void visitMemTransferInst(llvm::MemTransferInst &MTI) { + using namespace llvm; + Value *isVolatile = getNewFromOriginal(MTI.getOperand(3)); + auto srcAlign = MTI.getSourceAlign(); + auto dstAlign = MTI.getDestAlign(); + visitMemTransferCommon(MTI.getIntrinsicID(), srcAlign, dstAlign, MTI, + MTI.getOperand(0), MTI.getOperand(1), + getNewFromOriginal(MTI.getOperand(2)), + isVolatile); + } + void visitMemTransferCommon(llvm::Intrinsic::ID ID, llvm::MaybeAlign srcAlign, + llvm::MaybeAlign dstAlign, llvm::CallInst &MTI, + llvm::Value *orig_dst, llvm::Value *orig_src, + llvm::Value *new_size, llvm::Value *isVolatile) { + return; + } + void visitFenceInst(llvm::FenceInst &FI) { + return; + } + void visitIntrinsicInst(llvm::IntrinsicInst &II) { + SmallVector orig_ops(II.getNumOperands()); + for (unsigned i = 0; i < II.getNumOperands(); ++i) { + orig_ops[i] = II.getOperand(i); + } + if (handleAdjointForIntrinsic(II.getIntrinsicID(), II, orig_ops)) + return; + todo(II); + return; + } + + void visitReturnInst(llvm::ReturnInst &I) { + return; + } + + void visitBranchInst(llvm::BranchInst &I) { + return; + } + void visitSwitchInst(llvm::SwitchInst &I) { + return; + } + void visitUnreachableInst(llvm::UnreachableInst &I) { + return; + } + void visitLoadLike(llvm::Instruction &I, llvm::MaybeAlign alignment, + llvm::Value *mask = nullptr, + llvm::Value *orig_maskInit = nullptr) { + return; + } + + void visitCommonStore(llvm::Instruction &I, llvm::Value *orig_ptr, + llvm::Value *orig_val, llvm::MaybeAlign prevalign, + bool isVolatile, llvm::AtomicOrdering ordering, + llvm::SyncScope::ID syncScope, llvm::Value *mask) { + return; + } + + bool + handleAdjointForIntrinsic(llvm::Intrinsic::ID ID, llvm::Instruction &I, + llvm::SmallVectorImpl &orig_ops) { + using namespace llvm; + + + switch (ID) { + case Intrinsic::nvvm_ldu_global_i: + case Intrinsic::nvvm_ldu_global_p: + case Intrinsic::nvvm_ldu_global_f: + case Intrinsic::nvvm_ldg_global_i: + case Intrinsic::nvvm_ldg_global_p: + case Intrinsic::nvvm_ldg_global_f: { + auto CI = cast(I.getOperand(1)); + visitLoadLike(I, /*Align*/ MaybeAlign(CI->getZExtValue())); + return false; + } + default: + break; + } + + if (ID == Intrinsic::masked_store) { + auto align0 = cast(I.getOperand(2))->getZExtValue(); + auto align = MaybeAlign(align0); + visitCommonStore(I, /*orig_ptr*/ I.getOperand(1), + /*orig_val*/ I.getOperand(0), align, + /*isVolatile*/ false, llvm::AtomicOrdering::NotAtomic, + SyncScope::SingleThread, + /*mask*/ getNewFromOriginal(I.getOperand(3))); + return false; + } + if (ID == Intrinsic::masked_load) { + auto align0 = cast(I.getOperand(1))->getZExtValue(); + auto align = MaybeAlign(align0); + visitLoadLike(I, align, + /*mask*/ getNewFromOriginal(I.getOperand(2)), + /*orig_maskInit*/ I.getOperand(3)); + return false; + } + + auto called = cast(&I)->getCalledFunction(); + (void)called; + switch (ID) { +//#include "IntrinsicDerivatives.inc" + default: + break; + } + + switch (ID) { + case Intrinsic::nvvm_barrier0: + case Intrinsic::nvvm_barrier0_popc: + case Intrinsic::nvvm_barrier0_and: + case Intrinsic::nvvm_barrier0_or: + case Intrinsic::nvvm_membar_cta: + case Intrinsic::nvvm_membar_gl: + case Intrinsic::nvvm_membar_sys: + case Intrinsic::amdgcn_s_barrier: + return false; + default: break; + } + return true; + } + + llvm::Value *getNewFromOriginal(llvm::Value* v) { + auto found = originalToNewFn.find(v); + assert(found != originalToNewFn.end()); + return found->second; + } + + llvm::Instruction *getNewFromOriginal(llvm::Instruction* v) { + return cast(getNewFromOriginal((llvm::Value*)v)); + } + + bool handleKnownCalls(llvm::CallInst &call, llvm::Function *called, + llvm::StringRef funcName, + llvm::CallInst *const newCall) { + return false; + } + + Value* GetShadow(RequestContext &ctx, Value* v) { + if (auto F = dyn_cast(v)) + return Logic.CreateTruncate(ctx, F, fromwidth, towidth); + llvm::errs() << " unknown get truncated func: " << *v << "\n"; + llvm_unreachable("unknown get truncated func"); + return v; + } + // Return + void visitCallInst(llvm::CallInst &call) { + using namespace llvm; + + CallInst *const newCall = cast(getNewFromOriginal(&call)); + IRBuilder<> BuilderZ(newCall); + + if (auto called = call.getCalledFunction()) + if (handleKnownCalls(call, called, getFuncNameFromCall(&call), + newCall)) + return; + + RequestContext ctx(&call, &BuilderZ); + auto val = GetShadow(ctx, getNewFromOriginal(call.getCalledOperand())); + newCall->setCalledOperand(val); + return; + } +}; + +llvm::Function *EnzymeLogic::CreateTruncate(RequestContext context, llvm::Function *totrunc, + unsigned fromwidth, unsigned towidth){ + if (fromwidth == towidth) return totrunc; + + TruncateCacheKey tup(totrunc, fromwidth, towidth); + if (TruncateCachedFunctions.find(tup) != TruncateCachedFunctions.end()) { + return TruncateCachedFunctions.find(tup)->second; + } + + FunctionType *orig_FTy = totrunc->getFunctionType(); + SmallVector params; + + for (unsigned i = 0; i < orig_FTy->getNumParams(); ++i) { + params.push_back(orig_FTy->getParamType(i)); + } + + Type *NewTy = totrunc->getReturnType(); + + FunctionType *FTy = FunctionType::get(NewTy, params, totrunc->isVarArg()); + Function *NewF = + Function::Create(FTy, totrunc->getLinkage(), + "trunc_" + std::to_string(fromwidth) + "_" + std::to_string(towidth) + totrunc->getName(), totrunc->getParent()); + + NewF->setLinkage(Function::LinkageTypes::InternalLinkage); + + TruncateCachedFunctions[tup] = NewF; + + if (totrunc->empty()) { + std::string s; + llvm::raw_string_ostream ss(s); + ss << "No truncate mode found for " + totrunc->getName() << "\n"; + llvm::Value *toshow = totrunc; + if (context.req) { + toshow = context.req; + ss << " at context: " << *context.req; + } else { + ss << *totrunc << "\n"; + } + if (CustomErrorHandler) { + CustomErrorHandler(ss.str().c_str(), wrap(toshow), + ErrorType::NoDerivative, nullptr, wrap(totrunc), + wrap(context.ip)); + return NewF; + } + if (context.req) { + EmitFailure("NoTruncate", context.req->getDebugLoc(), context.req, + ss.str()); + return NewF; + } + llvm::errs() << "mod: " << *totrunc->getParent() << "\n"; + llvm::errs() << *totrunc << "\n"; + llvm_unreachable("attempting to truncate function without definition"); + } + + if (fromwidth < towidth) { + std::string s; + llvm::raw_string_ostream ss(s); + ss << "Cannot truncate into a large width\n"; + llvm::Value *toshow = totrunc; + if (context.req) { + toshow = context.req; + ss << " at context: " << *context.req; + } else { + ss << *totrunc << "\n"; + } + if (CustomErrorHandler) { + CustomErrorHandler(ss.str().c_str(), wrap(toshow), + ErrorType::NoDerivative, nullptr, wrap(totrunc), + wrap(context.ip)); + return NewF; + } + if (context.req) { + EmitFailure("NoTruncate", context.req->getDebugLoc(), context.req, + ss.str()); + return NewF; + } + llvm::errs() << "mod: " << *totrunc->getParent() << "\n"; + llvm::errs() << *totrunc << "\n"; + llvm_unreachable("attempting to truncate function without definition"); + } + + + ValueToValueMapTy originalToNewFn; + + for (auto i = totrunc->arg_begin(), j = NewF->arg_begin(); i != totrunc->arg_end();) { + originalToNewFn[i] = j; + j->setName(i->getName()); + ++j; + ++i; + } + + SmallVector Returns; +#if LLVM_VERSION_MAJOR >= 13 + CloneFunctionInto(NewF, totrunc, originalToNewFn, + CloneFunctionChangeType::LocalChangesOnly, Returns, "", + nullptr); +#else + CloneFunctionInto(NewF, totrunc, originalToNewFn, true, Returns, "", nullptr); +#endif + + NewF->setLinkage(Function::LinkageTypes::InternalLinkage); + + TruncateGenerator handle(originalToNewFn, fromwidth, towidth, totrunc, NewF, *this); + for (auto &BB : *totrunc) + for (auto &I : BB) + handle.visit(&I); + + if (llvm::verifyFunction(*NewF, &llvm::errs())) { + llvm::errs() << *totrunc << "\n"; + llvm::errs() << *NewF << "\n"; + report_fatal_error("function failed verification (5)"); + } + + return NewF; +} + llvm::Function *EnzymeLogic::CreateBatch(RequestContext context, Function *tobatch, unsigned width, ArrayRef arg_types, diff --git a/enzyme/Enzyme/EnzymeLogic.h b/enzyme/Enzyme/EnzymeLogic.h index c7f7c4bae86e..6a585a60f12e 100644 --- a/enzyme/Enzyme/EnzymeLogic.h +++ b/enzyme/Enzyme/EnzymeLogic.h @@ -510,6 +510,11 @@ class EnzymeLogic { llvm::ArrayRef arg_types, BATCH_TYPE ret_type); + using TruncateCacheKey = std::tuple; + std::map TruncateCachedFunctions; + llvm::Function *CreateTruncate(RequestContext context, llvm::Function *tobatch, + unsigned fromwidth, unsigned towidth); + /// Create a traced version of a function /// \p context the instruction which requested this trace (or null). /// \p totrace is the function to trace diff --git a/enzyme/Enzyme/Utils.h b/enzyme/Enzyme/Utils.h index 27f590ddb2bb..ac2c9294c096 100644 --- a/enzyme/Enzyme/Utils.h +++ b/enzyme/Enzyme/Utils.h @@ -81,7 +81,8 @@ enum class ErrorType { InternalError = 5, TypeDepthExceeded = 6, MixedActivityError = 7, - IllegalReplaceFicticiousPHIs = 8 + IllegalReplaceFicticiousPHIs = 8, + NoTruncate = 9, }; extern "C" { diff --git a/enzyme/test/Enzyme/CMakeLists.txt b/enzyme/test/Enzyme/CMakeLists.txt index 0187644409f2..d88af6ddd95e 100644 --- a/enzyme/test/Enzyme/CMakeLists.txt +++ b/enzyme/test/Enzyme/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(Sparse) +add_subdirectory(Truncate) add_subdirectory(ReverseMode) add_subdirectory(ReverseModeVector) add_subdirectory(ForwardMode) diff --git a/enzyme/test/Enzyme/Truncate/CMakeLists.txt b/enzyme/test/Enzyme/Truncate/CMakeLists.txt new file mode 100644 index 000000000000..79e649ab8e4b --- /dev/null +++ b/enzyme/test/Enzyme/Truncate/CMakeLists.txt @@ -0,0 +1,12 @@ +# Run regression and unit tests +add_lit_testsuite(check-enzyme-trunc "Running enzyme truncation tests" + ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS ${ENZYME_TEST_DEPS} + ARGS -v +) + +set_target_properties(check-enzyme-trunc PROPERTIES FOLDER "Tests") + +# add_lit_testsuites(ENZYME ${CMAKE_CURRENT_SOURCE_DIR} +# DEPENDS ${ENZYME_TEST_DEPS} +# ) diff --git a/enzyme/test/Enzyme/Truncate/simple.ll b/enzyme/test/Enzyme/Truncate/simple.ll new file mode 100644 index 000000000000..b0cea56f2292 --- /dev/null +++ b/enzyme/test/Enzyme/Truncate/simple.ll @@ -0,0 +1,43 @@ +; RUN: if [ %llvmver -lt 16 ]; then %opt < %s %loadEnzyme -enzyme -S | FileCheck %s; fi +; RUN: %opt < %s %newLoadEnzyme -passes="enzyme" -S | FileCheck %s + +define void @f(double* %x) { + %y = load double, double* %x + %m = fmul double %y, %y + store double %m, double* %x + ret void +} + +declare void (double*)* @__enzyme_truncate(...) + +define void @tester(double* %data) { +entry: + %ptr = call void (double*)* (...) @__enzyme_truncate(void (double*)* @f, i64 64, i64 32) + call void %ptr(double* %data) + ret void +} + +; CHECK: define void @tester(double* %data) +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @trunc_64_32f(double* %data) +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; CHECK: define internal void @trunc_64_32f(double* %x) +; CHECK-NEXT: %1 = alloca double, align 8 +; CHECK-NEXT: %y = load double, double* %x, align 8 +; CHECK-NEXT: store double %y, double* %1, align 8 +; CHECK-NEXT: %2 = bitcast double* %1 to float* +; CHECK-NEXT: %3 = load float, float* %2, align 4 +; CHECK-NEXT: store double %y, double* %1, align 8 +; CHECK-NEXT: %4 = bitcast double* %1 to float* +; CHECK-NEXT: %5 = load float, float* %4, align 4 +; CHECK-NEXT: %m = fmul float %5, %3 +; CHECK-NEXT: %6 = bitcast double* %1 to i64* +; CHECK-NEXT: store i64 0, i64* %6, align 4 +; CHECK-NEXT: %7 = bitcast double* %1 to float* +; CHECK-NEXT: store float %m, float* %7, align 4 +; CHECK-NEXT: %8 = load double, double* %1, align 8 +; CHECK-NEXT: store double %8, double* %x, align 8 +; CHECK-NEXT: ret void +; CHECK-NEXT: } \ No newline at end of file