From 4cede1a082f359cc7322b49bf73ee39f16c14b0b Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Wed, 13 Sep 2023 13:46:31 -0400 Subject: [PATCH] Simplify insert/extract, as required for address prop --- enzyme/Enzyme/FunctionUtils.cpp | 45 ++++ enzyme/Enzyme/GradientUtils.cpp | 6 +- enzyme/Enzyme/GradientUtils.h | 11 +- enzyme/test/Enzyme/ForwardMode/invptrint.ll | 1 - enzyme/test/Enzyme/ForwardMode/invptrint2.ll | 1 - enzyme/test/Enzyme/ForwardModeVector/fabs.ll | 4 +- .../Enzyme/ForwardModeVector/globallower.ll | 33 +-- .../Enzyme/ForwardModeVector/invertselect.ll | 2 - enzyme/test/Enzyme/ForwardModeVector/log1p.ll | 22 +- .../test/Enzyme/ForwardModeVector/ptr-eq.ll | 6 - .../test/Enzyme/ReverseMode/blas/gemm_f_c.ll | 16 +- .../Enzyme/ReverseMode/blas/gemm_f_c_lacpy.ll | 131 ++++++------ .../blas/gemm_f_c_lacpy_runtime_act.ll | 131 ++++++------ .../Enzyme/ReverseMode/blas/gemm_f_c_loop.ll | 117 ++++------ .../blas/gemm_f_c_transpose_lacpy.ll | 18 +- .../Enzyme/ReverseMode/blas/gemv_c_loop.ll | 66 +++--- .../Enzyme/ReverseMode/blas/gemv_c_loop2.ll | 76 +++---- .../ReverseMode/blas/gemv_c_loop3_matcopy.ll | 202 ++++++++---------- .../Enzyme/ReverseMode/blas/spmv_f_c_lacpy.ll | 115 +++++----- enzyme/test/Enzyme/ReverseMode/insertuw.ll | 7 +- enzyme/test/Enzyme/ReverseMode/insertuw2.ll | 9 +- .../ReverseMode/needsCacheWholeAllocation.ll | 13 +- .../Enzyme/ReverseMode/unnecessaryalloc.ll | 1 - enzyme/test/Enzyme/ReverseModeVector/mul.ll | 2 - .../test/Enzyme/ReverseModeVector/square.ll | 1 - 25 files changed, 484 insertions(+), 552 deletions(-) diff --git a/enzyme/Enzyme/FunctionUtils.cpp b/enzyme/Enzyme/FunctionUtils.cpp index be34578431f1..53a615049ed6 100644 --- a/enzyme/Enzyme/FunctionUtils.cpp +++ b/enzyme/Enzyme/FunctionUtils.cpp @@ -727,7 +727,52 @@ void PreProcessCache::AlwaysInline(Function *NewF) { } } +// Simplify all extractions to use inserted values, if possible. +void simplifyExtractions(Function *NewF) { + // First rewrite/remove any extractions + for (auto &BB : *NewF) { + IRBuilder<> B(&BB); + auto first = BB.begin(); + auto last = BB.empty() ? BB.end() : std::prev(BB.end()); + for (auto it = first; it != last;) { + auto inst = &*it; + // We iterate first here, since we may delete the instruction + // in the body + ++it; + if (auto E = dyn_cast(inst)) { + auto rep = GradientUtils::extractMeta(B, E->getAggregateOperand(), + E->getIndices(), E->getName(), + /*fallback*/ false); + if (rep) { + E->replaceAllUsesWith(rep); + E->eraseFromParent(); + } + } + } + } + // Now that there may be unused insertions, delete them. We keep a list of + // todo's since deleting an insertvalue may cause a different insertvalue to + // have no uses + SmallVector todo; + for (auto &BB : *NewF) { + for (auto &inst : BB) + if (auto I = dyn_cast(&inst)) { + if (I->getNumUses() == 0) + todo.push_back(I); + } + } + while (todo.size()) { + auto I = todo.pop_back_val(); + auto op = I->getAggregateOperand(); + I->eraseFromParent(); + if (auto I2 = dyn_cast(op)) + if (I2->getNumUses() == 0) + todo.push_back(I2); + } +} + void PreProcessCache::LowerAllocAddr(Function *NewF) { + simplifyExtractions(NewF); SmallVector Todo; for (auto &BB : *NewF) { for (auto &I : BB) { diff --git a/enzyme/Enzyme/GradientUtils.cpp b/enzyme/Enzyme/GradientUtils.cpp index f4e9bc082643..3483ea0ec259 100644 --- a/enzyme/Enzyme/GradientUtils.cpp +++ b/enzyme/Enzyme/GradientUtils.cpp @@ -4804,7 +4804,7 @@ Value *GradientUtils::extractMeta(IRBuilder<> &Builder, Value *Agg, Value *GradientUtils::extractMeta(IRBuilder<> &Builder, Value *Agg, ArrayRef off_init, - const Twine &name) { + const Twine &name, bool fallback) { std::vector off(off_init.begin(), off_init.end()); while (off.size() != 0) { if (auto Ins = dyn_cast(Agg)) { @@ -4843,6 +4843,10 @@ Value *GradientUtils::extractMeta(IRBuilder<> &Builder, Value *Agg, } if (off.size() == 0) return Agg; + + if (!fallback) + return nullptr; + if (Agg->getType()->isVectorTy() && off.size() == 1) return Builder.CreateExtractElement(Agg, off[0], name); diff --git a/enzyme/Enzyme/GradientUtils.h b/enzyme/Enzyme/GradientUtils.h index 5527eb97e2f7..ea5407e6044e 100644 --- a/enzyme/Enzyme/GradientUtils.h +++ b/enzyme/Enzyme/GradientUtils.h @@ -501,11 +501,20 @@ class GradientUtils : public CacheUtility { llvm::Type *getShadowType(llvm::Type *ty); + //! Helper routine to extract a nested element from a struct/array. This is + // a one dimensional special case of the multi-dim extractMeta below. static llvm::Value *extractMeta(llvm::IRBuilder<> &Builder, llvm::Value *Agg, unsigned off, const llvm::Twine &name = ""); + + //! Helper routine to extract a nested element from a struct/array. Unlike the + // LLVM instruction, this will attempt to re-use the inserted value, if it + // exists, rather than always creating a new instruction. If fallback is + // true (the default), it will create an instruction if it fails to find an + // appropriate existing value, otherwise it returns nullptr. static llvm::Value *extractMeta(llvm::IRBuilder<> &Builder, llvm::Value *Agg, llvm::ArrayRef off, - const llvm::Twine &name = ""); + const llvm::Twine &name = "", + bool fallback = true); static llvm::Value *recursiveFAdd(llvm::IRBuilder<> &B, llvm::Value *lhs, llvm::Value *rhs, diff --git a/enzyme/test/Enzyme/ForwardMode/invptrint.ll b/enzyme/test/Enzyme/ForwardMode/invptrint.ll index dc065dbcaf71..d1870ecac5c1 100644 --- a/enzyme/test/Enzyme/ForwardMode/invptrint.ll +++ b/enzyme/test/Enzyme/ForwardMode/invptrint.ll @@ -24,7 +24,6 @@ bb: ; CHECK: define internal { { double*, i64 }, double } @fwddiffez0({ double*, i64 } %const, double %act, double %"act'") ; CHECK-NEXT: bb: ; CHECK-NEXT: %"res'ipiv" = insertvalue { { double*, i64 }, double } zeroinitializer, { double*, i64 } %const, 0 -; CHECK-NEXT: %res = insertvalue { { double*, i64 }, double } undef, { double*, i64 } %const, 0 ; CHECK-NEXT: %"res2'ipiv" = insertvalue { { double*, i64 }, double } %"res'ipiv", double %"act'", 1 ; CHECK-NEXT: ret { { double*, i64 }, double } %"res2'ipiv" ; CHECK-NEXT: } diff --git a/enzyme/test/Enzyme/ForwardMode/invptrint2.ll b/enzyme/test/Enzyme/ForwardMode/invptrint2.ll index 69c839a588f3..e2616226af59 100644 --- a/enzyme/test/Enzyme/ForwardMode/invptrint2.ll +++ b/enzyme/test/Enzyme/ForwardMode/invptrint2.ll @@ -33,7 +33,6 @@ bb: ; CHECK-NEXT: store double 0.000000e+00, double* %3 ; CHECK-NEXT: %4 = load { double*, i64, double }, { double*, i64, double }* %0 ; CHECK-NEXT: %"res'ipiv" = insertvalue { { double*, i64, double }, double } zeroinitializer, { double*, i64, double } %4, 0 -; CHECK-NEXT: %res = insertvalue { { double*, i64, double }, double } undef, { double*, i64, double } %const, 0 ; CHECK-NEXT: %"res2'ipiv" = insertvalue { { double*, i64, double }, double } %"res'ipiv", double %"act'", 1 ; CHECK-NEXT: ret { { double*, i64, double }, double } %"res2'ipiv" ; CHECK-NEXT: } diff --git a/enzyme/test/Enzyme/ForwardModeVector/fabs.ll b/enzyme/test/Enzyme/ForwardModeVector/fabs.ll index 43c09ba661aa..d989d2a78ac0 100644 --- a/enzyme/test/Enzyme/ForwardModeVector/fabs.ll +++ b/enzyme/test/Enzyme/ForwardModeVector/fabs.ll @@ -29,9 +29,9 @@ declare double @llvm.fabs.f64(double) ; CHECK-NEXT: %[[i2:.+]] = select {{(fast )?}}i1 %[[i1]], double -1.000000e+00, double 1.000000e+00 ; CHECK-NEXT: %[[i0:.+]] = extractvalue [2 x double] %"x'", 0 ; CHECK-NEXT: %[[i3:.+]] = fmul fast double %[[i0]], %[[i2]] -; CHECK-NEXT: %[[i4:.+]] = insertvalue [2 x double] undef, double %[[i3]], 0 ; CHECK-NEXT: %[[i5:.+]] = extractvalue [2 x double] %"x'", 1 ; CHECK-NEXT: %[[i6:.+]] = fmul fast double %[[i5]], %[[i2]] +; CHECK-NEXT: %[[i4:.+]] = insertvalue [2 x double] undef, double %[[i3]], 0 ; CHECK-NEXT: %[[i7:.+]] = insertvalue [2 x double] %[[i4]], double %[[i6]], 1 ; CHECK-NEXT: ret [2 x double] %[[i7]] -; CHECK-NEXT: } \ No newline at end of file +; CHECK-NEXT: } diff --git a/enzyme/test/Enzyme/ForwardModeVector/globallower.ll b/enzyme/test/Enzyme/ForwardModeVector/globallower.ll index ff236d41a6c4..98ca05866d97 100644 --- a/enzyme/test/Enzyme/ForwardModeVector/globallower.ll +++ b/enzyme/test/Enzyme/ForwardModeVector/globallower.ll @@ -31,41 +31,23 @@ entry: ; CHECK: define {{[^@]+}}@fwddiffe3mulglobal(double [[X:%.*]], [3 x double] %"x'") ; CHECK-NEXT: entry: -; CHECK-NEXT: %"global'ipa" = alloca double, align 8 -; CHECK-NEXT: %"global'ipa1" = alloca double, align 8 -; CHECK-NEXT: %"global'ipa2" = alloca double, align 8 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* %"global'ipa" to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 8 [[TMP0]], i8 0, i64 8, i1 false) -; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* %"global'ipa1" to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 8 [[TMP1]], i8 0, i64 8, i1 false) -; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* %"global'ipa2" to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 8 [[TMP2]], i8 0, i64 8, i1 false) -; CHECK-NEXT: %"global_local.0.copyload'ipl" = load double, double* %"global'ipa", align 8 -; CHECK-NEXT: %"global_local.0.copyload'ipl3" = load double, double* %"global'ipa1", align 8 -; CHECK-NEXT: %"global_local.0.copyload'ipl4" = load double, double* %"global'ipa2", align 8 ; CHECK-NEXT: [[GLOBAL_LOCAL_0_COPYLOAD:%.*]] = load double, double* @global, align 8 ; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[GLOBAL_LOCAL_0_COPYLOAD]], [[X]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul fast double %"global_local.0.copyload'ipl", [[X]] -; CHECK-NEXT: [[TMP8:%.*]] = fmul fast double %"global_local.0.copyload'ipl3", [[X]] -; CHECK-NEXT: [[TMP12:%.*]] = fmul fast double %"global_local.0.copyload'ipl4", [[X]] ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [3 x double] %"x'", 0 ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast double [[TMP3]], [[GLOBAL_LOCAL_0_COPYLOAD]] ; CHECK-NEXT: [[TMP7:%.*]] = extractvalue [3 x double] %"x'", 1 ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast double [[TMP7]], [[GLOBAL_LOCAL_0_COPYLOAD]] ; CHECK-NEXT: [[TMP11:%.*]] = extractvalue [3 x double] %"x'", 2 ; CHECK-NEXT: [[TMP13:%.*]] = fmul fast double [[TMP11]], [[GLOBAL_LOCAL_0_COPYLOAD]] -; CHECK-NEXT: [[TMP6:%.*]] = fadd fast double [[TMP4]], [[TMP5]] -; CHECK-NEXT: [[TMP10:%.*]] = fadd fast double [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP14:%.*]] = fadd fast double [[TMP12]], [[TMP13]] ; CHECK-NEXT: [[MUL2:%.*]] = fmul fast double [[MUL]], [[MUL]] -; CHECK-NEXT: [[TMP15:%.*]] = fmul fast double [[TMP6]], [[MUL]] -; CHECK-NEXT: [[TMP19:%.*]] = fmul fast double [[TMP10]], [[MUL]] -; CHECK-NEXT: [[TMP23:%.*]] = fmul fast double [[TMP14]], [[MUL]] +; CHECK-NEXT: [[TMP15:%.*]] = fmul fast double [[TMP5]], [[MUL]] +; CHECK-NEXT: [[TMP19:%.*]] = fmul fast double [[TMP9]], [[MUL]] +; CHECK-NEXT: [[TMP23:%.*]] = fmul fast double [[TMP13]], [[MUL]] -; CHECK-NEXT: [[TMP16:%.*]] = fmul fast double [[TMP6]], [[MUL]] -; CHECK-NEXT: [[TMP20:%.*]] = fmul fast double [[TMP10]], [[MUL]] -; CHECK-NEXT: [[TMP24:%.*]] = fmul fast double [[TMP14]], [[MUL]] +; CHECK-NEXT: [[TMP16:%.*]] = fmul fast double [[TMP5]], [[MUL]] +; CHECK-NEXT: [[TMP20:%.*]] = fmul fast double [[TMP9]], [[MUL]] +; CHECK-NEXT: [[TMP24:%.*]] = fmul fast double [[TMP13]], [[MUL]] ; CHECK-NEXT: [[TMP17:%.*]] = fadd fast double [[TMP15]], [[TMP16]] ; CHECK-NEXT: [[TMP18:%.*]] = insertvalue [3 x double] undef, double [[TMP17]], 0 @@ -73,9 +55,6 @@ entry: ; CHECK-NEXT: [[TMP22:%.*]] = insertvalue [3 x double] [[TMP18]], double [[TMP21]], 1 ; CHECK-NEXT: [[TMP25:%.*]] = fadd fast double [[TMP23]], [[TMP24]] ; CHECK-NEXT: [[TMP26:%.*]] = insertvalue [3 x double] [[TMP22]], double [[TMP25]], 2 -; CHECK-NEXT: store double [[TMP17]], double* %"global'ipa", align 8 -; CHECK-NEXT: store double [[TMP21]], double* %"global'ipa1", align 8 -; CHECK-NEXT: store double [[TMP25]], double* %"global'ipa2", align 8 ; CHECK-NEXT: store double [[MUL2]], double* @global, align 8 ; CHECK-NEXT: ret [3 x double] [[TMP26]] ; diff --git a/enzyme/test/Enzyme/ForwardModeVector/invertselect.ll b/enzyme/test/Enzyme/ForwardModeVector/invertselect.ll index d4cfa4928233..2e364c231027 100644 --- a/enzyme/test/Enzyme/ForwardModeVector/invertselect.ll +++ b/enzyme/test/Enzyme/ForwardModeVector/invertselect.ll @@ -33,11 +33,9 @@ attributes #0 = { noinline } ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue [3 x float*] %"a'", 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [3 x float*] %"b'", 0 ; CHECK-NEXT: %"a.b'ipse" = select i1 [[CMP]], float* [[TMP2]], float* [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = insertvalue [3 x float*] undef, float* %"a.b'ipse", 0 ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue [3 x float*] %"a'", 1 ; CHECK-NEXT: [[TMP6:%.*]] = extractvalue [3 x float*] %"b'", 1 ; CHECK-NEXT: %"a.b'ipse1" = select i1 [[CMP]], float* [[TMP5]], float* [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = insertvalue [3 x float*] [[TMP4]], float* %"a.b'ipse1", 1 ; CHECK-NEXT: [[TMP8:%.*]] = extractvalue [3 x float*] %"a'", 2 ; CHECK-NEXT: [[TMP9:%.*]] = extractvalue [3 x float*] %"b'", 2 ; CHECK-NEXT: %"a.b'ipse2" = select i1 [[CMP]], float* [[TMP8]], float* [[TMP9]] diff --git a/enzyme/test/Enzyme/ForwardModeVector/log1p.ll b/enzyme/test/Enzyme/ForwardModeVector/log1p.ll index b02866dc65cb..230bfb652e22 100644 --- a/enzyme/test/Enzyme/ForwardModeVector/log1p.ll +++ b/enzyme/test/Enzyme/ForwardModeVector/log1p.ll @@ -24,15 +24,15 @@ declare double @log1p(double) ; CHECK: define internal [3 x double] @fwddiffe3tester(double %x, [3 x double] %"x'") ; CHECK-NEXT: entry: -; CHECK-NEXT: %0 = fadd fast double %x, 1.000000e+00 -; CHECK-NEXT: %1 = extractvalue [3 x double] %"x'", 0 -; CHECK-NEXT: %2 = fdiv fast double %1, %0 -; CHECK-NEXT: %3 = insertvalue [3 x double] undef, double %2, 0 -; CHECK-NEXT: %4 = extractvalue [3 x double] %"x'", 1 -; CHECK-NEXT: %5 = fdiv fast double %4, %0 -; CHECK-NEXT: %6 = insertvalue [3 x double] %3, double %5, 1 -; CHECK-NEXT: %7 = extractvalue [3 x double] %"x'", 2 -; CHECK-NEXT: %8 = fdiv fast double %7, %0 -; CHECK-NEXT: %9 = insertvalue [3 x double] %6, double %8, 2 -; CHECK-NEXT: ret [3 x double] %9 +; CHECK-NEXT: %[[i0:.+]] = fadd fast double %x, 1.000000e+00 +; CHECK-NEXT: %[[i1:.+]] = extractvalue [3 x double] %"x'", 0 +; CHECK-NEXT: %[[i2:.+]] = fdiv fast double %[[i1]], %[[i0]] +; CHECK-NEXT: %[[i4:.+]] = extractvalue [3 x double] %"x'", 1 +; CHECK-NEXT: %[[i5:.+]] = fdiv fast double %[[i4]], %[[i0]] +; CHECK-NEXT: %[[i7:.+]] = extractvalue [3 x double] %"x'", 2 +; CHECK-NEXT: %[[i8:.+]] = fdiv fast double %[[i7]], %[[i0]] +; CHECK-NEXT: %[[i3:.+]] = insertvalue [3 x double] undef, double %[[i2]], 0 +; CHECK-NEXT: %[[i6:.+]] = insertvalue [3 x double] %[[i3]], double %[[i5]], 1 +; CHECK-NEXT: %[[i9:.+]] = insertvalue [3 x double] %[[i6]], double %[[i8]], 2 +; CHECK-NEXT: ret [3 x double] %[[i9]] ; CHECK-NEXT: } diff --git a/enzyme/test/Enzyme/ForwardModeVector/ptr-eq.ll b/enzyme/test/Enzyme/ForwardModeVector/ptr-eq.ll index 9318b6a9bff1..4755965b1010 100644 --- a/enzyme/test/Enzyme/ForwardModeVector/ptr-eq.ll +++ b/enzyme/test/Enzyme/ForwardModeVector/ptr-eq.ll @@ -24,13 +24,10 @@ entry: ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = extractvalue [3 x double*] %"x'", 0 ; CHECK-NEXT: %"val'ipl" = load double, double* [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [3 x double] undef, double %"val'ipl", 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue [3 x double*] %"x'", 1 ; CHECK-NEXT: %"val'ipl1" = load double, double* [[TMP2]] -; CHECK-NEXT: [[TMP3:%.*]] = insertvalue [3 x double] [[TMP1]], double %"val'ipl1", 1 ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [3 x double*] %"x'", 2 ; CHECK-NEXT: %"val'ipl2" = load double, double* [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = insertvalue [3 x double] [[TMP3]], double %"val'ipl2", 2 ; CHECK-NEXT: [[VAL:%.*]] = load double, double* [[X]] ; CHECK-NEXT: [[TMP6:%.*]] = extractvalue [3 x double*] %"y'", 0 ; CHECK-NEXT: store double %"val'ipl", double* [[TMP6]] @@ -41,13 +38,10 @@ entry: ; CHECK-NEXT: store double [[VAL]], double* [[Y]] ; CHECK-NEXT: [[TMP12:%.*]] = extractvalue [3 x double*] %"x'", 0 ; CHECK-NEXT: %"ptr'ipc" = bitcast double* [[TMP12]] to i8* -; CHECK-NEXT: [[TMP13:%.*]] = insertvalue [3 x i8*] undef, i8* %"ptr'ipc", 0 ; CHECK-NEXT: [[TMP14:%.*]] = extractvalue [3 x double*] %"x'", 1 ; CHECK-NEXT: %"ptr'ipc3" = bitcast double* [[TMP14]] to i8* -; CHECK-NEXT: [[TMP15:%.*]] = insertvalue [3 x i8*] [[TMP13]], i8* %"ptr'ipc3", 1 ; CHECK-NEXT: [[TMP16:%.*]] = extractvalue [3 x double*] %"x'", 2 ; CHECK-NEXT: %"ptr'ipc4" = bitcast double* [[TMP16]] to i8* -; CHECK-NEXT: [[TMP17:%.*]] = insertvalue [3 x i8*] [[TMP15]], i8* %"ptr'ipc4", 2 ; CHECK-NEXT: [[PTR:%.*]] = bitcast double* [[X]] to i8* ; CHECK-NEXT: call void @free(i8* [[PTR]]) ; CHECK-NEXT: [[TMPZ4:%.*]] = icmp ne i8* [[PTR]], %"ptr'ipc" diff --git a/enzyme/test/Enzyme/ReverseMode/blas/gemm_f_c.ll b/enzyme/test/Enzyme/ReverseMode/blas/gemm_f_c.ll index bfec0ac14225..5a3d1793dbbe 100644 --- a/enzyme/test/Enzyme/ReverseMode/blas/gemm_f_c.ll +++ b/enzyme/test/Enzyme/ReverseMode/blas/gemm_f_c.ll @@ -176,8 +176,6 @@ entry: ; CHECK-NEXT: br i1 %39, label %[[enzyme_memcpy_double_mat_64_exit21]], label %[[init_idx]] ; CHECK: [[enzyme_memcpy_double_mat_64_exit21]]: ; preds = %__enzyme_memcpy_double_mat_64.exit, %[[init_end_i18]] -; CHECK-NEXT: %40 = insertvalue { double*, double* } undef, double* %cache.A, 0 -; CHECK-NEXT: %41 = insertvalue { double*, double* } %40, double* %cache.B, 1 ; CHECK-NEXT: call void @dgemm_64_(i8* %transa, i8* %transb, i8* %m_p, i8* %n_p, i8* %k_p, i8* %alpha_p, i8* %A, i8* %lda_p, i8* %B, i8* %ldb_p, i8* %beta_p, i8* %C, i8* %ldc_p) ; CHECK-NEXT: %"ptr'ipc" = bitcast i8* %"A'" to double* ; CHECK-NEXT: %ptr = bitcast i8* %A to double* @@ -186,10 +184,8 @@ entry: ; CHECK: invertentry: ; preds = %[[enzyme_memcpy_double_mat_64_exit21]] ; CHECK-NEXT: store double 0.000000e+00, double* %"ptr'ipc", align 8, !alias.scope !13, !noalias !10 -; CHECK-NEXT: %tape.ext.A = extractvalue { double*, double* } %41, 0 -; CHECK-NEXT: %42 = bitcast double* %tape.ext.A to i8* -; CHECK-NEXT: %tape.ext.B = extractvalue { double*, double* } %41, 1 -; CHECK-NEXT: %43 = bitcast double* %tape.ext.B to i8* +; CHECK-NEXT: %[[i42:.+]] = bitcast double* %cache.A to i8* +; CHECK-NEXT: %[[i43:.+]] = bitcast double* %cache.B to i8* ; CHECK-NEXT: %ld.transa = load i8, i8* %transa ; CHECK-DAG: %[[r0:.+]] = icmp eq i8 %ld.transa, 110 ; CHECK-DAG: %[[r1:.+]] = select i1 %[[r0]], i8 116, i8 0 @@ -217,13 +213,13 @@ entry: ; CHECK-DAG: %[[r19:.+]] = icmp eq i8 %loaded.trans4, 110 ; CHECK-DAG: %[[r20:.+]] = or i1 %[[r19]], %[[r18]] ; CHECK-DAG: %[[r21:.+]] = select i1 %[[r20]], i8* %k_p, i8* %n_p -; CHECK-NEXT: call void @dgemm_64_(i8* %transa, i8* %byref.transpose.transb, i8* %m_p, i8* %k_p, i8* %n_p, i8* %alpha_p, i8* %"C'", i8* %ldc_p, i8* %43, i8* %[[r21]], i8* %beta_p, i8* %"A'", i8* %lda_p) +; CHECK-NEXT: call void @dgemm_64_(i8* %transa, i8* %byref.transpose.transb, i8* %m_p, i8* %k_p, i8* %n_p, i8* %alpha_p, i8* %"C'", i8* %ldc_p, i8* %[[i43]], i8* %[[r21]], i8* %beta_p, i8* %"A'", i8* %lda_p) ; CHECK-NEXT: %loaded.trans5 = load i8, i8* %transa ; CHECK-DAG: %[[r22:.+]] = icmp eq i8 %loaded.trans5, 78 ; CHECK-DAG: %[[r23:.+]] = icmp eq i8 %loaded.trans5, 110 ; CHECK-DAG: %[[r24:.+]] = or i1 %[[r23]], %[[r22]] ; CHECK-DAG: %[[r25:.+]] = select i1 %[[r24]], i8* %m_p, i8* %k_p -; CHECK-NEXT: call void @dgemm_64_(i8* %byref.transpose.transa, i8* %transb, i8* %k_p, i8* %n_p, i8* %m_p, i8* %alpha_p, i8* %42, i8* %[[r25]], i8* %"C'", i8* %ldc_p, i8* %beta_p, i8* %"B'", i8* %ldb_p) +; CHECK-NEXT: call void @dgemm_64_(i8* %byref.transpose.transa, i8* %transb, i8* %k_p, i8* %n_p, i8* %m_p, i8* %alpha_p, i8* %[[i42]], i8* %[[r25]], i8* %"C'", i8* %ldc_p, i8* %beta_p, i8* %"B'", i8* %ldb_p) ; CHECK-NEXT: store i8 71, i8* %byref.constant.char.G ; CHECK-NEXT: store i64 0, i64* %byref.constant.int.0 ; CHECK-NEXT: %intcast.constant.int.0 = bitcast i64* %byref.constant.int.0 to i8* @@ -234,9 +230,9 @@ entry: ; CHECK-NEXT: store i64 0, i64* %[[byrefconstantint5]] ; CHECK-NEXT: %[[intcast09:.+]] = bitcast i64* %[[byrefconstantint5]] to i8* ; CHECK-NEXT: call void @dlascl_64_(i8* %byref.constant.char.G, i8* %intcast.constant.int.0, i8* %[[intcast07]], i8* %fpcast.constant.fp.1.0, i8* %beta_p, i8* %m_p, i8* %n_p, i8* %"C'", i8* %ldc_p, i8* %[[intcast09]]) -; CHECK-NEXT: %[[ret1:.+]] = bitcast double* %tape.ext.A to i8* +; CHECK-NEXT: %[[ret1:.+]] = bitcast double* %cache.A to i8* ; CHECK-NEXT: tail call void @free(i8* nonnull %[[ret1]]) -; CHECK-NEXT: %[[ret2:.+]] = bitcast double* %tape.ext.B to i8* +; CHECK-NEXT: %[[ret2:.+]] = bitcast double* %cache.B to i8* ; CHECK-NEXT: tail call void @free(i8* nonnull %[[ret2]]) ; CHECK-NEXT: ret void ; CHECK-NEXT: } diff --git a/enzyme/test/Enzyme/ReverseMode/blas/gemm_f_c_lacpy.ll b/enzyme/test/Enzyme/ReverseMode/blas/gemm_f_c_lacpy.ll index a5a0d0404cf5..9b09574ce220 100644 --- a/enzyme/test/Enzyme/ReverseMode/blas/gemm_f_c_lacpy.ll +++ b/enzyme/test/Enzyme/ReverseMode/blas/gemm_f_c_lacpy.ll @@ -108,17 +108,15 @@ entry: ; CHECK-NEXT: %cache.C = bitcast i8* %malloccall2 to double* ; CHECK-NEXT: store i8 0, i8* %byref.copy.garbage3 ; CHECK-NEXT: call void @dlacpy_64_(i8* %byref.copy.garbage3, i8* %m_p, i8* %n_p, i8* %C, i8* %ldc_p, double* %cache.C, i8* %m_p) -; CHECK-NEXT: %15 = insertvalue { double*, double* } undef, double* %cache.A, 0 -; CHECK-NEXT: %[[i16:.+]] = insertvalue { double*, double* } %15, double* %cache.C, 1 -; CHECK-NEXT: %17 = bitcast i8* %m_p to i64* -; CHECK-NEXT: %18 = load i64, i64* %17 -; CHECK-NEXT: %19 = bitcast i8* %n_p to i64* -; CHECK-NEXT: %20 = load i64, i64* %19 -; CHECK-NEXT: %size_AB = mul nuw i64 %18, %20 +; CHECK-NEXT: %[[i17:.+]] = bitcast i8* %m_p to i64* +; CHECK-NEXT: %[[i18:.+]] = load i64, i64* %[[i17]] +; CHECK-NEXT: %[[i19:.+]] = bitcast i8* %n_p to i64* +; CHECK-NEXT: %[[i20:.+]] = load i64, i64* %[[i19]] +; CHECK-NEXT: %size_AB = mul nuw i64 %[[i18]], %[[i20]] ; CHECK-NEXT: %mallocsize5 = mul nuw nsw i64 %size_AB, 8 ; CHECK-NEXT: %malloccall6 = tail call noalias nonnull i8* @malloc(i64 %mallocsize5) ; CHECK-NEXT: %mat_AB = bitcast i8* %malloccall6 to double* -; CHECK-NEXT: %21 = bitcast double* %mat_AB to i8* +; CHECK-NEXT: %[[i21:.+]] = bitcast double* %mat_AB to i8* ; CHECK-NEXT: call void @dgemm_64_(i8* %transa, i8* %transb, i8* %m_p, i8* %n_p, i8* %k_p, i8* %alpha, i8* %A, i8* %lda_p, i8* %B, i8* %ldb_p, i8* %beta, i8* %C, i8* %ldc_p) ; CHECK-NEXT: %"ptr'ipc" = bitcast i8* %"A'" to double* ; CHECK-NEXT: %ptr = bitcast i8* %A to double* @@ -127,12 +125,9 @@ entry: ; CHECK: invertentry: ; preds = %entry ; CHECK-NEXT: store double 0.000000e+00, double* %"ptr'ipc", align 8, !alias.scope !3, !noalias !0 -; CHECK-NEXT: %tape.ext.A = extractvalue { double*, double* } %[[i16]], 0 -; CHECK-NEXT: %[[matA:.+]] = bitcast double* %tape.ext.A to i8* -; CHECK-NEXT: %tape.ext.C = extractvalue { double*, double* } %[[i16]], 1 -; CHECK-NEXT: %[[matC0:.+]] = bitcast double* %tape.ext.C to i8* -; CHECK-NEXT: %tape.ext.C4 = extractvalue { double*, double* } %[[i16]], 1 -; CHECK-NEXT: %[[matC:.+]] = bitcast double* %tape.ext.C4 to i8* +; CHECK-NEXT: %[[matA:.+]] = bitcast double* %cache.A to i8* +; CHECK-NEXT: %[[matC0:.+]] = bitcast double* %cache.C to i8* +; CHECK-NEXT: %[[matC:.+]] = bitcast double* %cache.C to i8* ; CHECK-NEXT: %ld.transa = load i8, i8* %transa ; CHECK-DAG: %[[i25:.+]] = icmp eq i8 %ld.transa, 110 ; CHECK-DAG: %[[i26:.+]] = select i1 %[[i25]], i8 116, i8 0 @@ -161,12 +156,12 @@ entry: ; CHECK-DAG: %[[i41:.+]] = icmp eq i8 %loaded.trans7, 78 ; CHECK-DAG: %[[i42:.+]] = icmp eq i8 %loaded.trans7, 110 ; CHECK-NEXT: %[[i43:.+]] = or i1 %[[i42]], %[[i41]] -; CHECK-NEXT: %[[i44:.+]] = select i1 %43, i8* %m_p, i8* %k_p +; CHECK-NEXT: %[[i44:.+]] = select i1 %[[i43]], i8* %m_p, i8* %k_p ; CHECK-NEXT: store double 0.000000e+00, double* %byref.constant.fp.0.0 ; CHECK-NEXT: %fpcast.constant.fp.0.0 = bitcast double* %byref.constant.fp.0.0 to i8* -; CHECK-NEXT: call void @dgemm_64_(i8* %transa, i8* %transb, i8* %m_p, i8* %n_p, i8* %k_p, i8* %fpcast.constant.fp.1.0, i8* %22, i8* %[[i44]], i8* %B, i8* %ldb_p, i8* %fpcast.constant.fp.0.0, i8* %21, i8* %m_p) -; CHECK: %45 = bitcast i64* %byref.constant.one.i to i8* -; CHECK: %46 = bitcast i64* %byref.mat.size.i to i8* +; CHECK-NEXT: call void @dgemm_64_(i8* %transa, i8* %transb, i8* %m_p, i8* %n_p, i8* %k_p, i8* %fpcast.constant.fp.1.0, i8* %[[matA]], i8* %[[i44]], i8* %B, i8* %ldb_p, i8* %fpcast.constant.fp.0.0, i8* %[[i21]], i8* %m_p) +; CHECK: %[[i45:.+]] = bitcast i64* %byref.constant.one.i to i8* +; CHECK: %[[i46:.+]] = bitcast i64* %byref.mat.size.i to i8* ; CHECK: store i64 1, i64* %byref.constant.one.i ; CHECK-NEXT: %intcast.constant.one.i = bitcast i64* %byref.constant.one.i to i8* ; CHECK-DAG: %[[i47:.+]] = load i64, i64* %m @@ -184,88 +179,88 @@ entry: ; CHECK-NEXT: br i1 %[[i52]], label %fast.path.i, label %for.body.i ; CHECK: fast.path.i: ; preds = %init.idx.i -; CHECK-NEXT: %[[i53:.+]] = call fast double @ddot_64_(i8* %intcast.mat.size.i, i8* %"C'", i8* %intcast.constant.one.i, i8* %21, i8* %intcast.constant.one.i) +; CHECK-NEXT: %[[i53:.+]] = call fast double @ddot_64_(i8* %intcast.mat.size.i, i8* %"C'", i8* %intcast.constant.one.i, i8* %[[i21]], i8* %intcast.constant.one.i) ; CHECK-NEXT: br label %__enzyme_inner_prodd_64_.exit ; CHECK: for.body.i: ; preds = %for.body.i, %init.idx.i ; CHECK-NEXT: %Aidx.i = phi i64 [ 0, %init.idx.i ], [ %Aidx.next.i, %for.body.i ] ; CHECK-NEXT: %Bidx.i = phi i64 [ 0, %init.idx.i ], [ %Bidx.next.i, %for.body.i ] ; CHECK-NEXT: %iteration.i = phi i64 [ 0, %init.idx.i ], [ %iter.next.i, %for.body.i ] -; CHECK-NEXT: %sum.i = phi{{( fast)?}} double [ 0.000000e+00, %init.idx.i ], [ %57, %for.body.i ] -; CHECK-NEXT: %A.i.i = getelementptr inbounds double, double* %51, i64 %Aidx.i +; CHECK-NEXT: %sum.i = phi{{( fast)?}} double [ 0.000000e+00, %init.idx.i ], [ %[[i57:.+]], %for.body.i ] +; CHECK-NEXT: %A.i.i = getelementptr inbounds double, double* %[[i51]], i64 %Aidx.i ; CHECK-NEXT: %B.i.i = getelementptr inbounds double, double* %mat_AB, i64 %Bidx.i -; CHECK-NEXT: %54 = bitcast double* %A.i.i to i8* -; CHECK-NEXT: %55 = bitcast double* %B.i.i to i8* -; CHECK-NEXT: %56 = call fast double @ddot_64_(i8* %m_p, i8* %54, i8* %intcast.constant.one.i, i8* %55, i8* %intcast.constant.one.i) -; CHECK-NEXT: %Aidx.next.i = add nuw i64 %Aidx.i, %50 -; CHECK-NEXT: %Bidx.next.i = add nuw i64 %Aidx.i, %47 +; CHECK-NEXT: %[[i54:.+]] = bitcast double* %A.i.i to i8* +; CHECK-NEXT: %[[i55:.+]] = bitcast double* %B.i.i to i8* +; CHECK-NEXT: %[[i56:.+]] = call fast double @ddot_64_(i8* %m_p, i8* %[[i54]], i8* %intcast.constant.one.i, i8* %[[i55]], i8* %intcast.constant.one.i) +; CHECK-NEXT: %Aidx.next.i = add nuw i64 %Aidx.i, %[[i50]] +; CHECK-NEXT: %Bidx.next.i = add nuw i64 %Aidx.i, %[[i47]] ; CHECK-NEXT: %iter.next.i = add i64 %iteration.i, 1 -; CHECK-NEXT: %57 = fadd fast double %sum.i, %56 -; CHECK-NEXT: %58 = icmp eq i64 %iteration.i, %48 -; CHECK-NEXT: br i1 %58, label %__enzyme_inner_prodd_64_.exit, label %for.body.i +; CHECK-NEXT: %[[i57]] = fadd fast double %sum.i, %[[i56]] +; CHECK-NEXT: %[[i58:.+]] = icmp eq i64 %iteration.i, %[[i48]] +; CHECK-NEXT: br i1 %[[i58]], label %__enzyme_inner_prodd_64_.exit, label %for.body.i ; CHECK: __enzyme_inner_prodd_64_.exit: ; preds = %invertentry, %fast.path.i, %for.body.i ; CHECK-NEXT: %res.i = phi double [ 0.000000e+00, %invertentry ], [ %sum.i, %for.body.i ], [ %[[i53]], %fast.path.i ] -; CHECK-NEXT: %59 = bitcast i64* %byref.constant.one.i to i8* -; CHECK: %60 = bitcast i64* %byref.mat.size.i to i8* -; CHECK: %61 = bitcast i8* %"alpha'" to double* -; CHECK-NEXT: %62 = load double, double* %61 -; CHECK-NEXT: %63 = fadd fast double %62, %res.i -; CHECK-NEXT: store double %63, double* %61 +; CHECK-NEXT: %[[i59:.+]] = bitcast i64* %byref.constant.one.i to i8* +; CHECK: %[[i60:.+]] = bitcast i64* %byref.mat.size.i to i8* +; CHECK: %[[i61:.+]] = bitcast i8* %"alpha'" to double* +; CHECK-NEXT: %[[i62:.+]] = load double, double* %[[i61]] +; CHECK-NEXT: %[[i63:.+]] = fadd fast double %[[i62]], %res.i +; CHECK-NEXT: store double %[[i63]], double* %[[i61]] ; CHECK-NEXT: call void @dgemm_64_(i8* %transa, i8* %byref.transpose.transb, i8* %m_p, i8* %k_p, i8* %n_p, i8* %alpha, i8* %"C'", i8* %ldc_p, i8* %B, i8* %ldb_p, i8* %beta, i8* %"A'", i8* %lda_p) ; CHECK-NEXT: %loaded.trans8 = load i8, i8* %transa ; CHECK-DAG: %[[i64:.+]] = icmp eq i8 %loaded.trans8, 78 ; CHECK-DAG: %[[i65:.+]] = icmp eq i8 %loaded.trans8, 110 ; CHECK-DAG: %[[i66:.+]] = or i1 %[[i65]], %[[i64]] ; CHECK-NEXT: %[[i67:.+]] = select i1 %[[i66]], i8* %m_p, i8* %k_p -; CHECK-NEXT: call void @dgemm_64_(i8* %byref.transpose.transa, i8* %transb, i8* %k_p, i8* %n_p, i8* %m_p, i8* %alpha, i8* %22, i8* %[[i67]], i8* %"C'", i8* %ldc_p, i8* %beta, i8* %"B'", i8* %ldb_p) -; CHECK: %68 = bitcast i64* %byref.constant.one.i15 to i8* -; CHECK: %69 = bitcast i64* %byref.mat.size.i18 to i8* +; CHECK-NEXT: call void @dgemm_64_(i8* %byref.transpose.transa, i8* %transb, i8* %k_p, i8* %n_p, i8* %m_p, i8* %alpha, i8* %[[matA]], i8* %[[i67]], i8* %"C'", i8* %ldc_p, i8* %beta, i8* %"B'", i8* %ldb_p) +; CHECK: %[[i68:.+]] = bitcast i64* %byref.constant.one.i15 to i8* +; CHECK: %[[i69:.+]] = bitcast i64* %byref.mat.size.i18 to i8* ; CHECK: store i64 1, i64* %byref.constant.one.i15 ; CHECK-NEXT: %intcast.constant.one.i16 = bitcast i64* %byref.constant.one.i15 to i8* -; CHECK-NEXT: %70 = load i64, i64* %m -; CHECK-NEXT: %71 = load i64, i64* %n -; CHECK-NEXT: %mat.size.i17 = mul nuw i64 %70, %71 +; CHECK-NEXT: %[[i70:.+]] = load i64, i64* %m +; CHECK-NEXT: %[[i71:.+]] = load i64, i64* %n +; CHECK-NEXT: %mat.size.i17 = mul nuw i64 %[[i70]], %[[i71]] ; CHECK-NEXT: store i64 %mat.size.i17, i64* %byref.mat.size.i18 ; CHECK-NEXT: %intcast.mat.size.i19 = bitcast i64* %byref.mat.size.i18 to i8* -; CHECK-NEXT: %72 = icmp eq i64 %mat.size.i17, 0 -; CHECK-NEXT: br i1 %72, label %__enzyme_inner_prodd_64_.exit33, label %init.idx.i20 +; CHECK-NEXT: %[[i72:.+]] = icmp eq i64 %mat.size.i17, 0 +; CHECK-NEXT: br i1 %[[i72]], label %__enzyme_inner_prodd_64_.exit33, label %init.idx.i20 ; CHECK: init.idx.i20: ; preds = %__enzyme_inner_prodd_64_.exit -; CHECK-NEXT: %73 = load i64, i64* %ldc -; CHECK-NEXT: %74 = bitcast i8* %"C'" to double* -; CHECK-NEXT: %75 = icmp eq i64 %70, %73 -; CHECK-NEXT: br i1 %75, label %fast.path.i21, label %for.body.i31 +; CHECK-NEXT: %[[i73:.+]] = load i64, i64* %ldc +; CHECK-NEXT: %[[i74:.+]] = bitcast i8* %"C'" to double* +; CHECK-NEXT: %[[i75:.+]] = icmp eq i64 %[[i70]], %[[i73]] +; CHECK-NEXT: br i1 %[[i75]], label %fast.path.i21, label %for.body.i31 ; CHECK: fast.path.i21: ; preds = %init.idx.i20 -; CHECK-NEXT: %76 = call fast double @ddot_64_(i8* %intcast.mat.size.i19, i8* %"C'", i8* %intcast.constant.one.i16, i8* %23, i8* %intcast.constant.one.i16) +; CHECK-NEXT: %[[i76:.+]] = call fast double @ddot_64_(i8* %intcast.mat.size.i19, i8* %"C'", i8* %intcast.constant.one.i16, i8* %[[matC0]], i8* %intcast.constant.one.i16) ; CHECK-NEXT: br label %__enzyme_inner_prodd_64_.exit33 ; CHECK: for.body.i31: ; preds = %for.body.i31, %init.idx.i20 ; CHECK-NEXT: %Aidx.i22 = phi i64 [ 0, %init.idx.i20 ], [ %Aidx.next.i28, %for.body.i31 ] ; CHECK-NEXT: %Bidx.i23 = phi i64 [ 0, %init.idx.i20 ], [ %Bidx.next.i29, %for.body.i31 ] ; CHECK-NEXT: %iteration.i24 = phi i64 [ 0, %init.idx.i20 ], [ %iter.next.i30, %for.body.i31 ] -; CHECK-NEXT: %sum.i25 = phi{{( fast)?}} double [ 0.000000e+00, %init.idx.i20 ], [ %80, %for.body.i31 ] -; CHECK-NEXT: %A.i.i26 = getelementptr inbounds double, double* %74, i64 %Aidx.i22 -; CHECK-NEXT: %B.i.i27 = getelementptr inbounds double, double* %tape.ext.C, i64 %Bidx.i23 -; CHECK-NEXT: %77 = bitcast double* %A.i.i26 to i8* -; CHECK-NEXT: %78 = bitcast double* %B.i.i27 to i8* -; CHECK-NEXT: %79 = call fast double @ddot_64_(i8* %m_p, i8* %77, i8* %intcast.constant.one.i16, i8* %78, i8* %intcast.constant.one.i16) -; CHECK-NEXT: %Aidx.next.i28 = add nuw i64 %Aidx.i22, %73 -; CHECK-NEXT: %Bidx.next.i29 = add nuw i64 %Aidx.i22, %70 +; CHECK-NEXT: %sum.i25 = phi{{( fast)?}} double [ 0.000000e+00, %init.idx.i20 ], [ %[[i80:.+]], %for.body.i31 ] +; CHECK-NEXT: %A.i.i26 = getelementptr inbounds double, double* %[[i74]], i64 %Aidx.i22 +; CHECK-NEXT: %B.i.i27 = getelementptr inbounds double, double* %cache.C, i64 %Bidx.i23 +; CHECK-NEXT: %[[i77:.+]] = bitcast double* %A.i.i26 to i8* +; CHECK-NEXT: %[[i78:.+]] = bitcast double* %B.i.i27 to i8* +; CHECK-NEXT: %[[i79:.+]] = call fast double @ddot_64_(i8* %m_p, i8* %[[i77]], i8* %intcast.constant.one.i16, i8* %[[i78]], i8* %intcast.constant.one.i16) +; CHECK-NEXT: %Aidx.next.i28 = add nuw i64 %Aidx.i22, %[[i73]] +; CHECK-NEXT: %Bidx.next.i29 = add nuw i64 %Aidx.i22, %[[i70]] ; CHECK-NEXT: %iter.next.i30 = add i64 %iteration.i24, 1 -; CHECK-NEXT: %80 = fadd fast double %sum.i25, %79 -; CHECK-NEXT: %81 = icmp eq i64 %iteration.i24, %71 -; CHECK-NEXT: br i1 %81, label %__enzyme_inner_prodd_64_.exit33, label %for.body.i31 +; CHECK-NEXT: %[[i80]] = fadd fast double %sum.i25, %[[i79]] +; CHECK-NEXT: %[[i81:.+]] = icmp eq i64 %iteration.i24, %[[i71]] +; CHECK-NEXT: br i1 %[[i81:.+]], label %__enzyme_inner_prodd_64_.exit33, label %for.body.i31 ; CHECK: __enzyme_inner_prodd_64_.exit33: ; preds = %__enzyme_inner_prodd_64_.exit, %fast.path.i21, %for.body.i31 -; CHECK-NEXT: %res.i32 = phi double [ 0.000000e+00, %__enzyme_inner_prodd_64_.exit ], [ %sum.i25, %for.body.i31 ], [ %76, %fast.path.i21 ] -; CHECK-NEXT: %82 = bitcast i64* %byref.constant.one.i15 to i8* -; CHECK: %83 = bitcast i64* %byref.mat.size.i18 to i8* -; CHECK: %84 = bitcast i8* %"beta'" to double* -; CHECK-NEXT: %85 = load double, double* %84 -; CHECK-NEXT: %86 = fadd fast double %85, %res.i32 -; CHECK-NEXT: store double %86, double* %84 +; CHECK-NEXT: %res.i32 = phi double [ 0.000000e+00, %__enzyme_inner_prodd_64_.exit ], [ %sum.i25, %for.body.i31 ], [ %[[i76]], %fast.path.i21 ] +; CHECK-NEXT: %[[i82:.+]] = bitcast i64* %byref.constant.one.i15 to i8* +; CHECK: %[[i83:.+]] = bitcast i64* %byref.mat.size.i18 to i8* +; CHECK: %[[i84:.+]] = bitcast i8* %"beta'" to double* +; CHECK-NEXT: %[[i85:.+]] = load double, double* %[[i84]] +; CHECK-NEXT: %[[i86:.+]] = fadd fast double %[[i85]], %res.i32 +; CHECK-NEXT: store double %[[i86]], double* %[[i84]] ; CHECK-NEXT: store i8 71, i8* %byref.constant.char.G ; CHECK-NEXT: store i64 0, i64* %byref.constant.int.0 ; CHECK-NEXT: %intcast.constant.int.0 = bitcast i64* %byref.constant.int.0 to i8* @@ -276,9 +271,9 @@ entry: ; CHECK-NEXT: store i64 0, i64* %byref.constant.int.013 ; CHECK-NEXT: %intcast.constant.int.014 = bitcast i64* %byref.constant.int.013 to i8* ; CHECK-NEXT: call void @dlascl_64_(i8* %byref.constant.char.G, i8* %intcast.constant.int.0, i8* %intcast.constant.int.010, i8* %fpcast.constant.fp.1.012, i8* %beta, i8* %m_p, i8* %n_p, i8* %"C'", i8* %ldc_p, i8* %intcast.constant.int.014) -; CHECK-NEXT: %[[i87:.+]] = bitcast double* %tape.ext.A to i8* +; CHECK-NEXT: %[[i87:.+]] = bitcast double* %cache.A to i8* ; CHECK-NEXT: tail call void @free(i8* nonnull %[[i87]]) -; CHECK-NEXT: %[[i88:.+]] = bitcast double* %tape.ext.C4 to i8* +; CHECK-NEXT: %[[i88:.+]] = bitcast double* %cache.C to i8* ; CHECK-NEXT: tail call void @free(i8* nonnull %[[i88]]) ; CHECK-NEXT: ret void ; CHECK-NEXT: } diff --git a/enzyme/test/Enzyme/ReverseMode/blas/gemm_f_c_lacpy_runtime_act.ll b/enzyme/test/Enzyme/ReverseMode/blas/gemm_f_c_lacpy_runtime_act.ll index 7bb754529196..1aae37d5b5d7 100644 --- a/enzyme/test/Enzyme/ReverseMode/blas/gemm_f_c_lacpy_runtime_act.ll +++ b/enzyme/test/Enzyme/ReverseMode/blas/gemm_f_c_lacpy_runtime_act.ll @@ -113,17 +113,15 @@ entry: ; CHECK-NEXT: %cache.C = bitcast i8* %malloccall2 to double* ; CHECK-NEXT: store i8 0, i8* %byref.copy.garbage3 ; CHECK-NEXT: call void @dlacpy_64_(i8* %byref.copy.garbage3, i8* %m_p, i8* %n_p, i8* %C, i8* %ldc_p, double* %cache.C, i8* %m_p) -; CHECK-NEXT: %15 = insertvalue { double*, double* } undef, double* %cache.A, 0 -; CHECK-NEXT: %[[i16:.+]] = insertvalue { double*, double* } %15, double* %cache.C, 1 -; CHECK-NEXT: %17 = bitcast i8* %m_p to i64* -; CHECK-NEXT: %18 = load i64, i64* %17 -; CHECK-NEXT: %19 = bitcast i8* %n_p to i64* -; CHECK-NEXT: %20 = load i64, i64* %19 -; CHECK-NEXT: %size_AB = mul nuw i64 %18, %20 +; CHECK-NEXT: %[[i17:.+]] = bitcast i8* %m_p to i64* +; CHECK-NEXT: %[[i18:.+]] = load i64, i64* %[[i17]] +; CHECK-NEXT: %[[i19:.+]] = bitcast i8* %n_p to i64* +; CHECK-NEXT: %[[i20:.+]] = load i64, i64* %[[i19]] +; CHECK-NEXT: %size_AB = mul nuw i64 %[[i18]], %[[i20]] ; CHECK-NEXT: %mallocsize5 = mul nuw nsw i64 %size_AB, 8 ; CHECK-NEXT: %malloccall6 = tail call noalias nonnull i8* @malloc(i64 %mallocsize5) ; CHECK-NEXT: %mat_AB = bitcast i8* %malloccall6 to double* -; CHECK-NEXT: %21 = bitcast double* %mat_AB to i8* +; CHECK-NEXT: %[[i21:.+]] = bitcast double* %mat_AB to i8* ; CHECK-NEXT: call void @dgemm_64_(i8* %transa, i8* %transb, i8* %m_p, i8* %n_p, i8* %k_p, i8* %alpha, i8* %A, i8* %lda_p, i8* %B, i8* %ldb_p, i8* %beta, i8* %C, i8* %ldc_p) ; CHECK-NEXT: %"ptr'ipc" = bitcast i8* %"A'" to double* ; CHECK-NEXT: %ptr = bitcast i8* %A to double* @@ -132,12 +130,9 @@ entry: ; CHECK: invertentry: ; preds = %entry ; CHECK-NEXT: store double 0.000000e+00, double* %"ptr'ipc", align 8, !alias.scope !3, !noalias !0 -; CHECK-NEXT: %tape.ext.A = extractvalue { double*, double* } %[[i16]], 0 -; CHECK-NEXT: %[[matA:.+]] = bitcast double* %tape.ext.A to i8* -; CHECK-NEXT: %tape.ext.C = extractvalue { double*, double* } %[[i16]], 1 -; CHECK-NEXT: %[[matC0:.+]] = bitcast double* %tape.ext.C to i8* -; CHECK-NEXT: %tape.ext.C4 = extractvalue { double*, double* } %[[i16]], 1 -; CHECK-NEXT: %[[matC:.+]] = bitcast double* %tape.ext.C4 to i8* +; CHECK-NEXT: %[[matA:.+]] = bitcast double* %cache.A to i8* +; CHECK-NEXT: %[[matC0:.+]] = bitcast double* %cache.C to i8* +; CHECK-NEXT: %[[matC:.+]] = bitcast double* %cache.C to i8* ; CHECK-NEXT: %ld.transa = load i8, i8* %transa ; CHECK-DAG: %[[i25:.+]] = icmp eq i8 %ld.transa, 110 ; CHECK-DAG: %[[i26:.+]] = select i1 %[[i25]], i8 116, i8 0 @@ -169,12 +164,12 @@ entry: ; CHECK-DAG: %[[i41:.+]] = icmp eq i8 %loaded.trans7, 78 ; CHECK-DAG: %[[i42:.+]] = icmp eq i8 %loaded.trans7, 110 ; CHECK-NEXT: %[[i43:.+]] = or i1 %[[i42]], %[[i41]] -; CHECK-NEXT: %[[i44:.+]] = select i1 %43, i8* %m_p, i8* %k_p +; CHECK-NEXT: %[[i44:.+]] = select i1 %[[i43]], i8* %m_p, i8* %k_p ; CHECK-NEXT: store double 0.000000e+00, double* %byref.constant.fp.0.0 ; CHECK-NEXT: %fpcast.constant.fp.0.0 = bitcast double* %byref.constant.fp.0.0 to i8* -; CHECK-NEXT: call void @dgemm_64_(i8* %transa, i8* %transb, i8* %m_p, i8* %n_p, i8* %k_p, i8* %fpcast.constant.fp.1.0, i8* %22, i8* %[[i44]], i8* %B, i8* %ldb_p, i8* %fpcast.constant.fp.0.0, i8* %21, i8* %m_p) -; CHECK: %45 = bitcast i64* %byref.constant.one.i to i8* -; CHECK: %46 = bitcast i64* %byref.mat.size.i to i8* +; CHECK-NEXT: call void @dgemm_64_(i8* %transa, i8* %transb, i8* %m_p, i8* %n_p, i8* %k_p, i8* %fpcast.constant.fp.1.0, i8* %[[matA]], i8* %[[i44]], i8* %B, i8* %ldb_p, i8* %fpcast.constant.fp.0.0, i8* %[[i21]], i8* %m_p) +; CHECK: %[[i45:.+]] = bitcast i64* %byref.constant.one.i to i8* +; CHECK: %[[i46:.+]] = bitcast i64* %byref.mat.size.i to i8* ; CHECK: store i64 1, i64* %byref.constant.one.i ; CHECK-NEXT: %intcast.constant.one.i = bitcast i64* %byref.constant.one.i to i8* ; CHECK-DAG: %[[i47:.+]] = load i64, i64* %m @@ -192,34 +187,34 @@ entry: ; CHECK-NEXT: br i1 %[[i52]], label %fast.path.i, label %for.body.i ; CHECK: fast.path.i: ; preds = %init.idx.i -; CHECK-NEXT: %[[i53:.+]] = call fast double @ddot_64_(i8* %intcast.mat.size.i, i8* %"C'", i8* %intcast.constant.one.i, i8* %21, i8* %intcast.constant.one.i) +; CHECK-NEXT: %[[i53:.+]] = call fast double @ddot_64_(i8* %intcast.mat.size.i, i8* %"C'", i8* %intcast.constant.one.i, i8* %[[i21]], i8* %intcast.constant.one.i) ; CHECK-NEXT: br label %__enzyme_inner_prodd_64_.exit ; CHECK: for.body.i: ; preds = %for.body.i, %init.idx.i ; CHECK-NEXT: %Aidx.i = phi i64 [ 0, %init.idx.i ], [ %Aidx.next.i, %for.body.i ] ; CHECK-NEXT: %Bidx.i = phi i64 [ 0, %init.idx.i ], [ %Bidx.next.i, %for.body.i ] ; CHECK-NEXT: %iteration.i = phi i64 [ 0, %init.idx.i ], [ %iter.next.i, %for.body.i ] -; CHECK-NEXT: %sum.i = phi{{( fast)?}} double [ 0.000000e+00, %init.idx.i ], [ %57, %for.body.i ] -; CHECK-NEXT: %A.i.i = getelementptr inbounds double, double* %51, i64 %Aidx.i +; CHECK-NEXT: %sum.i = phi{{( fast)?}} double [ 0.000000e+00, %init.idx.i ], [ %[[i57:.+]], %for.body.i ] +; CHECK-NEXT: %A.i.i = getelementptr inbounds double, double* %[[i51]], i64 %Aidx.i ; CHECK-NEXT: %B.i.i = getelementptr inbounds double, double* %mat_AB, i64 %Bidx.i -; CHECK-NEXT: %54 = bitcast double* %A.i.i to i8* -; CHECK-NEXT: %55 = bitcast double* %B.i.i to i8* -; CHECK-NEXT: %56 = call fast double @ddot_64_(i8* %m_p, i8* %54, i8* %intcast.constant.one.i, i8* %55, i8* %intcast.constant.one.i) -; CHECK-NEXT: %Aidx.next.i = add nuw i64 %Aidx.i, %50 -; CHECK-NEXT: %Bidx.next.i = add nuw i64 %Aidx.i, %47 +; CHECK-NEXT: %[[i54:.+]] = bitcast double* %A.i.i to i8* +; CHECK-NEXT: %[[i55:.+]] = bitcast double* %B.i.i to i8* +; CHECK-NEXT: %[[i56:.+]] = call fast double @ddot_64_(i8* %m_p, i8* %[[i54]], i8* %intcast.constant.one.i, i8* %[[i55]], i8* %intcast.constant.one.i) +; CHECK-NEXT: %Aidx.next.i = add nuw i64 %Aidx.i, %[[i50]] +; CHECK-NEXT: %Bidx.next.i = add nuw i64 %Aidx.i, %[[i47]] ; CHECK-NEXT: %iter.next.i = add i64 %iteration.i, 1 -; CHECK-NEXT: %57 = fadd fast double %sum.i, %56 -; CHECK-NEXT: %58 = icmp eq i64 %iteration.i, %48 -; CHECK-NEXT: br i1 %58, label %__enzyme_inner_prodd_64_.exit, label %for.body.i +; CHECK-NEXT: %[[i57]] = fadd fast double %sum.i, %[[i56]] +; CHECK-NEXT: %[[i58:.+]] = icmp eq i64 %iteration.i, %[[i48]] +; CHECK-NEXT: br i1 %[[i58]], label %__enzyme_inner_prodd_64_.exit, label %for.body.i ; CHECK: __enzyme_inner_prodd_64_.exit: ; preds = %invertentry.alpha.active, %fast.path.i, %for.body.i ; CHECK-NEXT: %res.i = phi double [ 0.000000e+00, %invertentry.alpha.active ], [ %sum.i, %for.body.i ], [ %[[i53]], %fast.path.i ] -; CHECK-NEXT: %59 = bitcast i64* %byref.constant.one.i to i8* -; CHECK: %60 = bitcast i64* %byref.mat.size.i to i8* -; CHECK: %61 = bitcast i8* %"alpha'" to double* -; CHECK-NEXT: %62 = load double, double* %61 -; CHECK-NEXT: %63 = fadd fast double %62, %res.i -; CHECK-NEXT: store double %63, double* %61 +; CHECK-NEXT: %[[i59:.+]] = bitcast i64* %byref.constant.one.i to i8* +; CHECK: %[[i60:.+]] = bitcast i64* %byref.mat.size.i to i8* +; CHECK: %[[i61:.+]] = bitcast i8* %"alpha'" to double* +; CHECK-NEXT: %[[i62:.+]] = load double, double* %[[i61]] +; CHECK-NEXT: %[[i63:.+]] = fadd fast double %[[i62]], %res.i +; CHECK-NEXT: store double %[[i63]], double* %[[i61]] ; CHECK-NEXT: br label %invertentry.alpha.done ; CHECK: invertentry.alpha.done: ; preds = %__enzyme_inner_prodd_64_.exit, %invertentry @@ -238,60 +233,60 @@ entry: ; CHECK-DAG: %[[i65:.+]] = icmp eq i8 %loaded.trans8, 110 ; CHECK-DAG: %[[i66:.+]] = or i1 %[[i65]], %[[i64]] ; CHECK-NEXT: %[[i67:.+]] = select i1 %[[i66]], i8* %m_p, i8* %k_p -; CHECK-NEXT: call void @dgemm_64_(i8* %byref.transpose.transa, i8* %transb, i8* %k_p, i8* %n_p, i8* %m_p, i8* %alpha, i8* %22, i8* %[[i67]], i8* %"C'", i8* %ldc_p, i8* %beta, i8* %"B'", i8* %ldb_p) +; CHECK-NEXT: call void @dgemm_64_(i8* %byref.transpose.transa, i8* %transb, i8* %k_p, i8* %n_p, i8* %m_p, i8* %alpha, i8* %[[matA]], i8* %[[i67]], i8* %"C'", i8* %ldc_p, i8* %beta, i8* %"B'", i8* %ldb_p) ; CHECK-NEXT: br label %invertentry.B.done ; CHECK: invertentry.B.done: ; preds = %invertentry.B.active, %invertentry.A.done ; CHECK-NEXT: br i1 %rt.inactive.beta, label %invertentry.beta.done, label %invertentry.beta.active ; CHECK: invertentry.beta.active: ; preds = %invertentry.B.done -; CHECK: %68 = bitcast i64* %byref.constant.one.i15 to i8* -; CHECK: %69 = bitcast i64* %byref.mat.size.i18 to i8* +; CHECK: %[[i68:.+]] = bitcast i64* %byref.constant.one.i15 to i8* +; CHECK: %[[i69:.+]] = bitcast i64* %byref.mat.size.i18 to i8* ; CHECK: store i64 1, i64* %byref.constant.one.i15 ; CHECK-NEXT: %intcast.constant.one.i16 = bitcast i64* %byref.constant.one.i15 to i8* -; CHECK-NEXT: %70 = load i64, i64* %m -; CHECK-NEXT: %71 = load i64, i64* %n -; CHECK-NEXT: %mat.size.i17 = mul nuw i64 %70, %71 +; CHECK-NEXT: %[[i70:.+]] = load i64, i64* %m +; CHECK-NEXT: %[[i71:.+]] = load i64, i64* %n +; CHECK-NEXT: %mat.size.i17 = mul nuw i64 %[[i70]], %[[i71]] ; CHECK-NEXT: store i64 %mat.size.i17, i64* %byref.mat.size.i18 ; CHECK-NEXT: %intcast.mat.size.i19 = bitcast i64* %byref.mat.size.i18 to i8* -; CHECK-NEXT: %72 = icmp eq i64 %mat.size.i17, 0 -; CHECK-NEXT: br i1 %72, label %__enzyme_inner_prodd_64_.exit33, label %init.idx.i20 +; CHECK-NEXT: %[[i72:.+]] = icmp eq i64 %mat.size.i17, 0 +; CHECK-NEXT: br i1 %[[i72]], label %__enzyme_inner_prodd_64_.exit33, label %init.idx.i20 ; CHECK: init.idx.i20: ; preds = %invertentry.beta.active -; CHECK-NEXT: %73 = load i64, i64* %ldc -; CHECK-NEXT: %74 = bitcast i8* %"C'" to double* -; CHECK-NEXT: %75 = icmp eq i64 %70, %73 -; CHECK-NEXT: br i1 %75, label %fast.path.i21, label %for.body.i31 +; CHECK-NEXT: %[[i73:.+]] = load i64, i64* %ldc +; CHECK-NEXT: %[[i74:.+]] = bitcast i8* %"C'" to double* +; CHECK-NEXT: %[[i75:.+]] = icmp eq i64 %[[i70]], %[[i73]] +; CHECK-NEXT: br i1 %[[i75]], label %fast.path.i21, label %for.body.i31 ; CHECK: fast.path.i21: ; preds = %init.idx.i20 -; CHECK-NEXT: %76 = call fast double @ddot_64_(i8* %intcast.mat.size.i19, i8* %"C'", i8* %intcast.constant.one.i16, i8* %23, i8* %intcast.constant.one.i16) +; CHECK-NEXT: %[[i76:.+]] = call fast double @ddot_64_(i8* %intcast.mat.size.i19, i8* %"C'", i8* %intcast.constant.one.i16, i8* %[[matC0]], i8* %intcast.constant.one.i16) ; CHECK-NEXT: br label %__enzyme_inner_prodd_64_.exit33 ; CHECK: for.body.i31: ; preds = %for.body.i31, %init.idx.i20 ; CHECK-NEXT: %Aidx.i22 = phi i64 [ 0, %init.idx.i20 ], [ %Aidx.next.i28, %for.body.i31 ] ; CHECK-NEXT: %Bidx.i23 = phi i64 [ 0, %init.idx.i20 ], [ %Bidx.next.i29, %for.body.i31 ] ; CHECK-NEXT: %iteration.i24 = phi i64 [ 0, %init.idx.i20 ], [ %iter.next.i30, %for.body.i31 ] -; CHECK-NEXT: %sum.i25 = phi{{( fast)?}} double [ 0.000000e+00, %init.idx.i20 ], [ %80, %for.body.i31 ] -; CHECK-NEXT: %A.i.i26 = getelementptr inbounds double, double* %74, i64 %Aidx.i22 -; CHECK-NEXT: %B.i.i27 = getelementptr inbounds double, double* %tape.ext.C, i64 %Bidx.i23 -; CHECK-NEXT: %77 = bitcast double* %A.i.i26 to i8* -; CHECK-NEXT: %78 = bitcast double* %B.i.i27 to i8* -; CHECK-NEXT: %79 = call fast double @ddot_64_(i8* %m_p, i8* %77, i8* %intcast.constant.one.i16, i8* %78, i8* %intcast.constant.one.i16) -; CHECK-NEXT: %Aidx.next.i28 = add nuw i64 %Aidx.i22, %73 -; CHECK-NEXT: %Bidx.next.i29 = add nuw i64 %Aidx.i22, %70 +; CHECK-NEXT: %sum.i25 = phi{{( fast)?}} double [ 0.000000e+00, %init.idx.i20 ], [ %[[i80:.+]], %for.body.i31 ] +; CHECK-NEXT: %A.i.i26 = getelementptr inbounds double, double* %[[i74]], i64 %Aidx.i22 +; CHECK-NEXT: %B.i.i27 = getelementptr inbounds double, double* %cache.C, i64 %Bidx.i23 +; CHECK-NEXT: %[[i77:.+]] = bitcast double* %A.i.i26 to i8* +; CHECK-NEXT: %[[i78:.+]] = bitcast double* %B.i.i27 to i8* +; CHECK-NEXT: %[[i79:.+]] = call fast double @ddot_64_(i8* %m_p, i8* %[[i77]], i8* %intcast.constant.one.i16, i8* %[[i78]], i8* %intcast.constant.one.i16) +; CHECK-NEXT: %Aidx.next.i28 = add nuw i64 %Aidx.i22, %[[i73]] +; CHECK-NEXT: %Bidx.next.i29 = add nuw i64 %Aidx.i22, %[[i70]] ; CHECK-NEXT: %iter.next.i30 = add i64 %iteration.i24, 1 -; CHECK-NEXT: %80 = fadd fast double %sum.i25, %79 -; CHECK-NEXT: %81 = icmp eq i64 %iteration.i24, %71 -; CHECK-NEXT: br i1 %81, label %__enzyme_inner_prodd_64_.exit33, label %for.body.i31 +; CHECK-NEXT: %[[i80]] = fadd fast double %sum.i25, %[[i79]] +; CHECK-NEXT: %[[i81:.+]] = icmp eq i64 %iteration.i24, %[[i71]] +; CHECK-NEXT: br i1 %[[i81]], label %__enzyme_inner_prodd_64_.exit33, label %for.body.i31 ; CHECK: __enzyme_inner_prodd_64_.exit33: ; preds = %invertentry.beta.active, %fast.path.i21, %for.body.i31 -; CHECK-NEXT: %res.i32 = phi double [ 0.000000e+00, %invertentry.beta.active ], [ %sum.i25, %for.body.i31 ], [ %76, %fast.path.i21 ] -; CHECK-NEXT: %82 = bitcast i64* %byref.constant.one.i15 to i8* -; CHECK: %83 = bitcast i64* %byref.mat.size.i18 to i8* -; CHECK: %84 = bitcast i8* %"beta'" to double* -; CHECK-NEXT: %85 = load double, double* %84 -; CHECK-NEXT: %86 = fadd fast double %85, %res.i32 -; CHECK-NEXT: store double %86, double* %84 +; CHECK-NEXT: %res.i32 = phi double [ 0.000000e+00, %invertentry.beta.active ], [ %sum.i25, %for.body.i31 ], [ %[[i76]], %fast.path.i21 ] +; CHECK-NEXT: %[[i82:.+]] = bitcast i64* %byref.constant.one.i15 to i8* +; CHECK: %[[i83:.+]] = bitcast i64* %byref.mat.size.i18 to i8* +; CHECK: %[[i84:.+]] = bitcast i8* %"beta'" to double* +; CHECK-NEXT: %[[i85:.+]] = load double, double* %[[i84]] +; CHECK-NEXT: %[[i86:.+]] = fadd fast double %[[i85]], %res.i32 +; CHECK-NEXT: store double %[[i86]], double* %[[i84]] ; CHECK-NEXT: br label %invertentry.beta.done ; CHECK: invertentry.beta.done: ; preds = %__enzyme_inner_prodd_64_.exit33, %invertentry.B.done @@ -311,9 +306,9 @@ entry: ; CHECK-NEXT: br label %invertentry.C.done ; CHECK: invertentry.C.done: ; preds = %invertentry.C.active, %invertentry.beta.done -; CHECK-NEXT: %[[i87:.+]] = bitcast double* %tape.ext.A to i8* +; CHECK-NEXT: %[[i87:.+]] = bitcast double* %cache.A to i8* ; CHECK-NEXT: tail call void @free(i8* nonnull %[[i87]]) -; CHECK-NEXT: %[[i88:.+]] = bitcast double* %tape.ext.C4 to i8* +; CHECK-NEXT: %[[i88:.+]] = bitcast double* %cache.C to i8* ; CHECK-NEXT: tail call void @free(i8* nonnull %[[i88]]) ; CHECK-NEXT: ret void ; CHECK-NEXT: } diff --git a/enzyme/test/Enzyme/ReverseMode/blas/gemm_f_c_loop.ll b/enzyme/test/Enzyme/ReverseMode/blas/gemm_f_c_loop.ll index ffea78c6efa5..ca9e23085eaf 100644 --- a/enzyme/test/Enzyme/ReverseMode/blas/gemm_f_c_loop.ll +++ b/enzyme/test/Enzyme/ReverseMode/blas/gemm_f_c_loop.ll @@ -187,15 +187,6 @@ entry: ; CHECK-NEXT: br i1 %23, label %__enzyme_memcpy_double_mat_64.exit, label %init.idx.i ; CHECK: __enzyme_memcpy_double_mat_64.exit: ; preds = %loop, %init.end.i -; CHECK-NEXT: %[[i19:.+]] = insertvalue { i8, i8, i64, double, i64, i64, double, i64, double* } undef, i8 %avld.transa, 0 -; CHECK-NEXT: %[[i20:.+]] = insertvalue { i8, i8, i64, double, i64, i64, double, i64, double* } %[[i19]], i8 %avld.transb, 1 -; CHECK-NEXT: %[[i21:.+]] = insertvalue { i8, i8, i64, double, i64, i64, double, i64, double* } %[[i20]], i64 %avld.k, 2 -; CHECK-NEXT: %[[i22:.+]] = insertvalue { i8, i8, i64, double, i64, i64, double, i64, double* } %[[i21]], double %avld.alpha, 3 -; CHECK-NEXT: %[[i23:.+]] = insertvalue { i8, i8, i64, double, i64, i64, double, i64, double* } %[[i22]], i64 %avld.lda, 4 -; CHECK-NEXT: %[[i24:.+]] = insertvalue { i8, i8, i64, double, i64, i64, double, i64, double* } %[[i23]], i64 %avld.ldb, 5 -; CHECK-NEXT: %[[i25:.+]] = insertvalue { i8, i8, i64, double, i64, i64, double, i64, double* } %[[i24]], double %avld.beta, 6 -; CHECK-NEXT: %[[i26:.+]] = insertvalue { i8, i8, i64, double, i64, i64, double, i64, double* } %[[i25]], i64 %avld.ldc, 7 -; CHECK-NEXT: %[[i27:.+]] = insertvalue { i8, i8, i64, double, i64, i64, double, i64, double* } %[[i26]], double* %cache.A, 8 ; CHECK-NEXT: call void @dgemm_64_(i8* %transa, i8* %transb, i8* %m_p, i8* %n_p, i8* %k_p, i8* %alpha_p, i8* %A, i8* %lda_p, i8* %B, i8* %ldb_p, i8* %beta_p, i8* %C, i8* %ldc_p) ; CHECK-NEXT: call void @free(i8* %m_p) ; CHECK-NEXT: %cmp = icmp eq i64 %iv.next, 10 @@ -205,32 +196,32 @@ entry: ; CHECK-NEXT: br label %invertexit ; CHECK: invertentry: ; preds = %invertloop -; CHECK-NEXT: %33 = load i64, i64* %"iv'ac" +; CHECK-NEXT: %[[i33:.+]] = load i64, i64* %"iv'ac" ; CHECK-NEXT: %forfree = load i8**, i8*** %malloccall_cache, align 8, !dereferenceable !15, !invariant.group !2 -; CHECK-NEXT: %34 = bitcast i8** %forfree to i8* -; CHECK-NEXT: tail call void @free(i8* nonnull %34), !enzyme_cache_free !0 -; CHECK-NEXT: %35 = load i64, i64* %"iv'ac" +; CHECK-NEXT: %[[i34:.+]] = bitcast i8** %forfree to i8* +; CHECK-NEXT: tail call void @free(i8* nonnull %[[i34]]), !enzyme_cache_free !0 +; CHECK-NEXT: %[[i35:.+]] = load i64, i64* %"iv'ac" ; CHECK-NEXT: %[[forfree15:.+]] = load i8**, i8*** %lda_p_cache, align 8, !dereferenceable !15, !invariant.group !5 -; CHECK-NEXT: %36 = bitcast i8** %[[forfree15]] to i8* -; CHECK-NEXT: tail call void @free(i8* nonnull %36), !enzyme_cache_free !3 +; CHECK-NEXT: %[[i36:.+]] = bitcast i8** %[[forfree15]] to i8* +; CHECK-NEXT: tail call void @free(i8* nonnull %[[i36]]), !enzyme_cache_free !3 ; CHECK-NEXT: ret void ; CHECK: invertloop: ; preds = %remat_loop_loop, %remat_loop_loop -; CHECK-NEXT: %37 = load i64, i64* %"iv'ac" -; CHECK-NEXT: %38 = load i8**, i8*** %malloccall_cache, align 8, !dereferenceable !15, !invariant.group !2 -; CHECK-NEXT: %39 = getelementptr inbounds i8*, i8** %38, i64 %37 -; CHECK-NEXT: %40 = load i8*, i8** %39, align 8, !invariant.group !16 -; CHECK-NEXT: %cache.A_unwrap = bitcast i8* %40 to double* +; CHECK-NEXT: %[[i37:.+]] = load i64, i64* %"iv'ac" +; CHECK-NEXT: %[[i38:.+]] = load i8**, i8*** %malloccall_cache, align 8, !dereferenceable !15, !invariant.group !2 +; CHECK-NEXT: %[[i39:.+]] = getelementptr inbounds i8*, i8** %[[i38]], i64 %[[i37]] +; CHECK-NEXT: %[[i40:.+]] = load i8*, i8** %[[i39]], align 8, !invariant.group !16 +; CHECK-NEXT: %cache.A_unwrap = bitcast i8* %[[i40]] to double* ; CHECK-NEXT: %pcld.ldc_unwrap = bitcast i8* %ldc_p to i64* ; CHECK-NEXT: %avld.ldc_unwrap = load i64, i64* %pcld.ldc_unwrap ; CHECK-NEXT: %pcld.beta_unwrap = bitcast i8* %beta_p to double* ; CHECK-NEXT: %avld.beta_unwrap = load double, double* %pcld.beta_unwrap ; CHECK-NEXT: %pcld.ldb_unwrap = bitcast i8* %ldb_p to i64* ; CHECK-NEXT: %avld.ldb_unwrap = load i64, i64* %pcld.ldb_unwrap -; CHECK-NEXT: %41 = load i8**, i8*** %lda_p_cache, align 8, !dereferenceable !15, !invariant.group !5 -; CHECK-NEXT: %42 = getelementptr inbounds i8*, i8** %41, i64 %37 -; CHECK-NEXT: %43 = load i8*, i8** %42, align 8, !invariant.group !17 -; CHECK-NEXT: %pcld.lda_unwrap = bitcast i8* %43 to i64* +; CHECK-NEXT: %[[i41:.+]] = load i8**, i8*** %lda_p_cache, align 8, !dereferenceable !15, !invariant.group !5 +; CHECK-NEXT: %[[i42:.+]] = getelementptr inbounds i8*, i8** %[[i41]], i64 %[[i37]] +; CHECK-NEXT: %[[i43:.+]] = load i8*, i8** %[[i42]], align 8, !invariant.group !17 +; CHECK-NEXT: %pcld.lda_unwrap = bitcast i8* %[[i43]] to i64* ; CHECK-NEXT: %avld.lda_unwrap = load i64, i64* %pcld.lda_unwrap ; CHECK-NEXT: %pcld.alpha_unwrap = bitcast i8* %alpha_p to double* ; CHECK-NEXT: %avld.alpha_unwrap = load double, double* %pcld.alpha_unwrap @@ -238,42 +229,24 @@ entry: ; CHECK-NEXT: %avld.k_unwrap = load i64, i64* %pcld.k_unwrap ; CHECK-NEXT: %avld.transb_unwrap = load i8, i8* %transb ; CHECK-NEXT: %avld.transa_unwrap = load i8, i8* %transa -; CHECK-NEXT: %_unwrap = insertvalue { i8, i8, i64, double, i64, i64, double, i64, double* } undef, i8 %avld.transa_unwrap, 0 -; CHECK-NEXT: %[[_unwrap22:.+]] = insertvalue { i8, i8, i64, double, i64, i64, double, i64, double* } %_unwrap, i8 %avld.transb_unwrap, 1 -; CHECK-NEXT: %[[_unwrap23:.+]] = insertvalue { i8, i8, i64, double, i64, i64, double, i64, double* } %[[_unwrap22]], i64 %avld.k_unwrap, 2 -; CHECK-NEXT: %[[_unwrap24:.+]] = insertvalue { i8, i8, i64, double, i64, i64, double, i64, double* } %[[_unwrap23]], double %avld.alpha_unwrap, 3 -; CHECK-NEXT: %[[_unwrap25:.+]] = insertvalue { i8, i8, i64, double, i64, i64, double, i64, double* } %[[_unwrap24]], i64 %avld.lda_unwrap, 4 -; CHECK-NEXT: %[[_unwrap26:.+]] = insertvalue { i8, i8, i64, double, i64, i64, double, i64, double* } %[[_unwrap25]], i64 %avld.ldb_unwrap, 5 -; CHECK-NEXT: %[[_unwrap27:.+]] = insertvalue { i8, i8, i64, double, i64, i64, double, i64, double* } %[[_unwrap26]], double %avld.beta_unwrap, 6 -; CHECK-NEXT: %[[_unwrap28:.+]] = insertvalue { i8, i8, i64, double, i64, i64, double, i64, double* } %[[_unwrap27]], i64 %avld.ldc_unwrap, 7 -; CHECK-NEXT: %[[_unwrap29:.+]] = insertvalue { i8, i8, i64, double, i64, i64, double, i64, double* } %[[_unwrap28]], double* %cache.A_unwrap, 8 -; CHECK-NEXT: %tape.ext.transa = extractvalue { i8, i8, i64, double, i64, i64, double, i64, double* } %[[_unwrap29]], 0 -; CHECK-NEXT: store i8 %tape.ext.transa, i8* %byref.transa -; CHECK-NEXT: %tape.ext.transb = extractvalue { i8, i8, i64, double, i64, i64, double, i64, double* } %[[_unwrap29]], 1 -; CHECK-NEXT: store i8 %tape.ext.transb, i8* %byref.transb -; CHECK-NEXT: %tape.ext.k = extractvalue { i8, i8, i64, double, i64, i64, double, i64, double* } %[[_unwrap29]], 2 -; CHECK-NEXT: store i64 %tape.ext.k, i64* %byref.k +; CHECK-NEXT: store i8 %avld.transa_unwrap, i8* %byref.transa +; CHECK-NEXT: store i8 %avld.transb_unwrap, i8* %byref.transb +; CHECK-NEXT: store i64 %avld.k_unwrap, i64* %byref.k ; CHECK-NEXT: %cast.k = bitcast i64* %byref.k to i8* -; CHECK-NEXT: %tape.ext.alpha = extractvalue { i8, i8, i64, double, i64, i64, double, i64, double* } %[[_unwrap29]], 3 -; CHECK-NEXT: store double %tape.ext.alpha, double* %byref.alpha +; CHECK-NEXT: store double %avld.alpha_unwrap, double* %byref.alpha ; CHECK-NEXT: %cast.alpha = bitcast double* %byref.alpha to i8* -; CHECK-NEXT: %tape.ext.lda = extractvalue { i8, i8, i64, double, i64, i64, double, i64, double* } %[[_unwrap29]], 4 -; CHECK-NEXT: store i64 %tape.ext.lda, i64* %byref.lda +; CHECK-NEXT: store i64 %avld.lda_unwrap, i64* %byref.lda ; CHECK-NEXT: %cast.lda = bitcast i64* %byref.lda to i8* -; CHECK-NEXT: %tape.ext.ldb = extractvalue { i8, i8, i64, double, i64, i64, double, i64, double* } %[[_unwrap29]], 5 -; CHECK-NEXT: store i64 %tape.ext.ldb, i64* %byref.ldb +; CHECK-NEXT: store i64 %avld.ldb_unwrap, i64* %byref.ldb ; CHECK-NEXT: %cast.ldb = bitcast i64* %byref.ldb to i8* -; CHECK-NEXT: %tape.ext.beta = extractvalue { i8, i8, i64, double, i64, i64, double, i64, double* } %[[_unwrap29]], 6 -; CHECK-NEXT: store double %tape.ext.beta, double* %byref.beta +; CHECK-NEXT: store double %avld.beta_unwrap, double* %byref.beta ; CHECK-NEXT: %cast.beta = bitcast double* %byref.beta to i8* -; CHECK-NEXT: %tape.ext.ldc = extractvalue { i8, i8, i64, double, i64, i64, double, i64, double* } %[[_unwrap29]], 7 -; CHECK-NEXT: store i64 %tape.ext.ldc, i64* %byref.ldc +; CHECK-NEXT: store i64 %avld.ldc_unwrap, i64* %byref.ldc ; CHECK-NEXT: %cast.ldc = bitcast i64* %byref.ldc to i8* -; CHECK-NEXT: %tape.ext.A = extractvalue { i8, i8, i64, double, i64, i64, double, i64, double* } %[[_unwrap29]], 8 -; CHECK-NEXT: %44 = bitcast double* %tape.ext.A to i8* -; CHECK-NEXT: %45 = load i64, i64* %"iv'ac" -; CHECK-NEXT: %46 = load i8*, i8** %m_p_cache, align 8, !invariant.group !6 -; CHECK-NEXT: %47 = load i64, i64* %"iv'ac" +; CHECK-NEXT: %[[i44:.+]] = bitcast double* %cache.A_unwrap to i8* +; CHECK-NEXT: %[[i45:.+]] = load i64, i64* %"iv'ac" +; CHECK-NEXT: %[[i46:.+]] = load i8*, i8** %m_p_cache, align 8, !invariant.group !6 +; CHECK-NEXT: %[[i47:.+]] = load i64, i64* %"iv'ac" ; CHECK-NEXT: %n_p_unwrap = bitcast i64* %n to i8* ; CHECK-NEXT: %ld.transa = load i8, i8* %byref.transa ; CHECK-DAG: %[[r0:.+]] = icmp eq i8 %ld.transa, 110 @@ -301,8 +274,8 @@ entry: ; CHECK-DAG: %[[r18:.+]] = icmp eq i8 %loaded.trans30, 78 ; CHECK-DAG: %[[r19:.+]] = icmp eq i8 %loaded.trans30, 110 ; CHECK-DAG: %[[r20:.+]] = or i1 %[[r19]], %[[r18]] -; CHECK-DAG: %[[r21:.+]] = select i1 %[[r20]], i8* %46, i8* %cast.k -; CHECK-NEXT: call void @dgemm_64_(i8* %byref.transpose.transa, i8* %byref.transb, i8* %cast.k, i8* %n_p_unwrap, i8* %46, i8* %cast.alpha, i8* %44, i8* %[[r21]], i8* %"C'", i8* %cast.ldc, i8* %cast.beta, i8* %"B'", i8* %cast.ldb) +; CHECK-DAG: %[[r21:.+]] = select i1 %[[r20]], i8* %[[i46]], i8* %cast.k +; CHECK-NEXT: call void @dgemm_64_(i8* %byref.transpose.transa, i8* %byref.transb, i8* %cast.k, i8* %n_p_unwrap, i8* %[[i46]], i8* %cast.alpha, i8* %[[i44]], i8* %[[r21]], i8* %"C'", i8* %cast.ldc, i8* %cast.beta, i8* %"B'", i8* %cast.ldb) ; CHECK-NEXT: store i8 71, i8* %byref.constant.char.G ; CHECK-NEXT: store i64 0, i64* %byref.constant.int.0 ; CHECK-NEXT: %intcast.constant.int.0 = bitcast i64* %byref.constant.int.0 to i8* @@ -312,19 +285,19 @@ entry: ; CHECK-NEXT: %fpcast.constant.fp.1.0 = bitcast double* %byref.constant.fp.1.0 to i8* ; CHECK-NEXT: store i64 0, i64* %[[byrefconstantint33]] ; CHECK-NEXT: %intcast.constant.int.034 = bitcast i64* %[[byrefconstantint33]] to i8* -; CHECK-NEXT: call void @dlascl_64_(i8* %byref.constant.char.G, i8* %intcast.constant.int.0, i8* %intcast.constant.int.032, i8* %fpcast.constant.fp.1.0, i8* %cast.beta, i8* %46, i8* %n_p_unwrap, i8* %"C'", i8* %cast.ldc, i8* %intcast.constant.int.034) -; CHECK-NEXT: %68 = bitcast double* %tape.ext.A to i8* -; CHECK-NEXT: tail call void @free(i8* nonnull %68) -; CHECK-NEXT: call void @free(i8* %46) -; CHECK-NEXT: %69 = load i64, i64* %"iv'ac" -; CHECK-NEXT: %70 = icmp eq i64 %69, 0 -; CHECK-NEXT: %71 = xor i1 %70, true -; CHECK-NEXT: br i1 %70, label %invertentry, label %incinvertloop +; CHECK-NEXT: call void @dlascl_64_(i8* %byref.constant.char.G, i8* %intcast.constant.int.0, i8* %intcast.constant.int.032, i8* %fpcast.constant.fp.1.0, i8* %cast.beta, i8* %[[i46]], i8* %n_p_unwrap, i8* %"C'", i8* %cast.ldc, i8* %intcast.constant.int.034) +; CHECK-NEXT: %[[i68:.+]] = bitcast double* %cache.A_unwrap to i8* +; CHECK-NEXT: tail call void @free(i8* nonnull %[[i68]]) +; CHECK-NEXT: call void @free(i8* %[[i46]]) +; CHECK-NEXT: %[[i69:.+]] = load i64, i64* %"iv'ac" +; CHECK-NEXT: %[[i70:.+]] = icmp eq i64 %[[i69]], 0 +; CHECK-NEXT: %[[i71:.+]] = xor i1 %[[i70]], true +; CHECK-NEXT: br i1 %[[i70]], label %invertentry, label %incinvertloop ; CHECK: incinvertloop: ; preds = %invertloop -; CHECK-NEXT: %72 = load i64, i64* %"iv'ac" -; CHECK-NEXT: %73 = add nsw i64 %72, -1 -; CHECK-NEXT: store i64 %73, i64* %"iv'ac" +; CHECK-NEXT: %[[i72:.+]] = load i64, i64* %"iv'ac" +; CHECK-NEXT: %[[i73:.+]] = add nsw i64 %[[i72]], -1 +; CHECK-NEXT: store i64 %[[i73]], i64* %"iv'ac" ; CHECK-NEXT: br label %remat_enter ; CHECK: invertexit: ; preds = %exit @@ -340,12 +313,12 @@ entry: ; CHECK: remat_loop_loop: ; preds = %remat_enter ; CHECK-NEXT: %remat_m_p = call i8* @malloc(i64 8) ; CHECK-NEXT: store i8* %remat_m_p, i8** %m_p_cache, align 8, !invariant.group !6 -; CHECK-NEXT: %74 = load i64, i64* %"iv'ac" -; CHECK-NEXT: %75 = load i8*, i8** %m_p_cache, align 8, !invariant.group !6 -; CHECK-NEXT: %m_unwrap = bitcast i8* %75 to i64* +; CHECK-NEXT: %[[i74:.+]] = load i64, i64* %"iv'ac" +; CHECK-NEXT: %[[i75:.+]] = load i8*, i8** %m_p_cache, align 8, !invariant.group !6 +; CHECK-NEXT: %m_unwrap = bitcast i8* %[[i75]] to i64* ; CHECK-NEXT: store i64 4, i64* %m_unwrap, align 16 -; CHECK-NEXT: %76 = load i64, i64* %"iv'ac" -; CHECK-NEXT: %iv.next_unwrap = add nuw nsw i64 %76, 1 +; CHECK-NEXT: %[[i76:.+]] = load i64, i64* %"iv'ac" +; CHECK-NEXT: %iv.next_unwrap = add nuw nsw i64 %[[i76]], 1 ; CHECK-NEXT: %cmp_unwrap = icmp eq i64 %iv.next_unwrap, 10 ; CHECK-NEXT: br i1 %cmp_unwrap, label %invertloop, label %invertloop ; CHECK-NEXT: } diff --git a/enzyme/test/Enzyme/ReverseMode/blas/gemm_f_c_transpose_lacpy.ll b/enzyme/test/Enzyme/ReverseMode/blas/gemm_f_c_transpose_lacpy.ll index 08a7361985a4..7241127eb8bb 100644 --- a/enzyme/test/Enzyme/ReverseMode/blas/gemm_f_c_transpose_lacpy.ll +++ b/enzyme/test/Enzyme/ReverseMode/blas/gemm_f_c_transpose_lacpy.ll @@ -120,8 +120,6 @@ entry: ; CHECK-NEXT: %cache.B = bitcast i8* %[[malloccall2]] to double* ; CHECK-NEXT: store i8 0, i8* %byref.copy.garbage4 ; CHECK-NEXT: call void @dlacpy_64_(i8* %[[byrefgarbage2]], i8* %13, i8* %14, i8* %B, i8* %ldb_p, double* %cache.B, i8* %13) -; CHECK-NEXT: %[[i22:.+]] = insertvalue { double*, double* } undef, double* %cache.A, 0 -; CHECK-NEXT: %[[i23:.+]] = insertvalue { double*, double* } %[[i22]], double* %cache.B, 1 ; CHECK-NEXT: call void @dgemm_64_(i8* %transa, i8* %transb, i8* %m_p, i8* %n_p, i8* %k_p, i8* %alpha_p, i8* %A, i8* %lda_p, i8* %B, i8* %ldb_p, i8* %beta_p, i8* %C, i8* %ldc_p) ; CHECK-NEXT: %"ptr'ipc" = bitcast i8* %"A'" to double* ; CHECK-NEXT: %ptr = bitcast i8* %A to double* @@ -130,10 +128,8 @@ entry: ; CHECK: invertentry: ; preds = %entry ; CHECK-NEXT: store double 0.000000e+00, double* %"ptr'ipc", align 8, !alias.scope !3, !noalias !0 -; CHECK-NEXT: %tape.ext.A = extractvalue { double*, double* } %[[i23]], 0 -; CHECK-NEXT: %[[i24:.+]] = bitcast double* %tape.ext.A to i8* -; CHECK-NEXT: %tape.ext.B = extractvalue { double*, double* } %[[i23]], 1 -; CHECK-NEXT: %[[i25:.+]] = bitcast double* %tape.ext.B to i8* +; CHECK-NEXT: %[[i24:.+]] = bitcast double* %cache.A to i8* +; CHECK-NEXT: %[[i25:.+]] = bitcast double* %cache.B to i8* ; CHECK-NEXT: %ld.transa = load i8, i8* %transa ; CHECK-DAG: %[[i26:.+]] = icmp eq i8 %ld.transa, 110 ; CHECK-DAG: %[[i27:.+]] = select i1 %[[i26]], i8 116, i8 0 @@ -159,9 +155,9 @@ entry: ; CHECK-NEXT: %loaded.trans5 = load i8, i8* %transb ; CHECK-DAG: %[[i40:.+]] = icmp eq i8 %loaded.trans5, 78 ; CHECK-DAG: %[[i41:.+]] = icmp eq i8 %loaded.trans5, 110 -; CHECK-NEXT: %42 = or i1 %[[i41]], %[[i40]] -; CHECK-NEXT: %43 = select i1 %42, i8* %k_p, i8* %n_p -; CHECK-NEXT: call void @dgemm_64_(i8* %transa, i8* %byref.transpose.transb, i8* %m_p, i8* %k_p, i8* %n_p, i8* %alpha_p, i8* %"C'", i8* %ldc_p, i8* %[[i25]], i8* %43, i8* %beta_p, i8* %"A'", i8* %lda_p) +; CHECK-NEXT: %[[i42:.+]] = or i1 %[[i41]], %[[i40]] +; CHECK-NEXT: %[[i43:.+]] = select i1 %[[i42]], i8* %k_p, i8* %n_p +; CHECK-NEXT: call void @dgemm_64_(i8* %transa, i8* %byref.transpose.transb, i8* %m_p, i8* %k_p, i8* %n_p, i8* %alpha_p, i8* %"C'", i8* %ldc_p, i8* %[[i25]], i8* %[[i43]], i8* %beta_p, i8* %"A'", i8* %lda_p) ; CHECK-NEXT: %[[cachedtrans2:.+]] = load i8, i8* %transa ; CHECK-DAG: %[[i54:.+]] = icmp eq i8 %[[cachedtrans2]], 78 ; CHECK-DAG: %[[i55:.+]] = icmp eq i8 %[[cachedtrans2]], 110 @@ -178,9 +174,9 @@ entry: ; CHECK-NEXT: store i64 0, i64* %[[int05]] ; CHECK-NEXT: %[[intcast010:.+]] = bitcast i64* %[[int05]] to i8* ; CHECK-NEXT: call void @dlascl_64_(i8* %byref.constant.char.G, i8* %[[intcast0]], i8* %[[intcast08]], i8* %fpcast.constant.fp.1.0, i8* %beta_p, i8* %m_p, i8* %n_p, i8* %"C'", i8* %ldc_p, i8* %[[intcast010]]) -; CHECK-NEXT: %[[free1:.+]] = bitcast double* %tape.ext.A to i8* +; CHECK-NEXT: %[[free1:.+]] = bitcast double* %cache.A to i8* ; CHECK-NEXT: tail call void @free(i8* nonnull %[[free1]]) -; CHECK-NEXT: %[[free2:.+]] = bitcast double* %tape.ext.B to i8* +; CHECK-NEXT: %[[free2:.+]] = bitcast double* %cache.B to i8* ; CHECK-NEXT: tail call void @free(i8* nonnull %[[free2]]) ; CHECK-NEXT: ret void ; CHECK-NEXT: } diff --git a/enzyme/test/Enzyme/ReverseMode/blas/gemv_c_loop.ll b/enzyme/test/Enzyme/ReverseMode/blas/gemv_c_loop.ll index b7f39662552c..21807cc0b4de 100644 --- a/enzyme/test/Enzyme/ReverseMode/blas/gemv_c_loop.ll +++ b/enzyme/test/Enzyme/ReverseMode/blas/gemv_c_loop.ll @@ -67,30 +67,28 @@ entry: ; CHECK-NEXT: store i8* %malloccall, i8** %5, align 8, !invariant.group !8 ; CHECK-NEXT: %cache.x = bitcast i8* %malloccall2 to double* ; CHECK-NEXT: call void @cblas_dcopy(i32 %1, double* %x0, i32 1, double* %cache.x, i32 1) -; CHECK-NEXT: %6 = insertvalue { double*, double* } undef, double* %cache.A, 0 -; CHECK-NEXT: %7 = insertvalue { double*, double* } %6, double* %cache.x, 1 ; CHECK-NEXT: tail call void @cblas_dgemv(i32 noundef 101, i32 noundef 111, i32 noundef %N, i32 noundef %N, double noundef 1.000000e-03, double* noundef %K, i32 noundef %N, double* noundef %x0, i32 noundef 1, double noundef 1.000000e+00, double* noundef %v0, i32 noundef 1) ; CHECK-NEXT: %exitcond.not = icmp eq i64 %iv.next, 5000 ; CHECK-NEXT: br i1 %exitcond.not, label %for.cond.cleanup, label %for.body ; CHECK: invertentry: ; preds = %invertfor.body -; CHECK-NEXT: %8 = load i64, i64* %"iv'ac", align 4 +; CHECK-NEXT: %[[i8:.+]] = load i64, i64* %"iv'ac", align 4 ; CHECK-NEXT: %forfree = load i8**, i8*** %malloccall2_cache, align 8, !dereferenceable !6, !invariant.group !2 -; CHECK-NEXT: %9 = bitcast i8** %forfree to i8* -; CHECK-NEXT: tail call void @free(i8* nonnull %9), !enzyme_cache_free !0 -; CHECK-NEXT: %10 = load i64, i64* %"iv'ac", align 4 +; CHECK-NEXT: %[[i9:.+]] = bitcast i8** %forfree to i8* +; CHECK-NEXT: tail call void @free(i8* nonnull %[[i9]]), !enzyme_cache_free !0 +; CHECK-NEXT: %[[i10:.+]] = load i64, i64* %"iv'ac", align 4 ; CHECK-NEXT: %forfree17 = load i8**, i8*** %malloccall_cache, align 8, !dereferenceable !6, !invariant.group !5 -; CHECK-NEXT: %11 = bitcast i8** %forfree17 to i8* -; CHECK-NEXT: tail call void @free(i8* nonnull %11), !enzyme_cache_free !3 +; CHECK-NEXT: %[[i11:.+]] = bitcast i8** %forfree17 to i8* +; CHECK-NEXT: tail call void @free(i8* nonnull %[[i11]]), !enzyme_cache_free !3 ; CHECK-NEXT: ret void ; CHECK: invertfor.cond.cleanup: ; preds = %for.cond.cleanup ; CHECK-NEXT: store double %differeturn, double* %"'de", align 8 -; CHECK-NEXT: %12 = load double, double* %"'de", align 8 +; CHECK-NEXT: %[[i12:.+]] = load double, double* %"'de", align 8 ; CHECK-NEXT: store double 0.000000e+00, double* %"'de", align 8 -; CHECK-NEXT: %13 = load double, double* %"x0'", align 8, !alias.scope !9, !noalias !12 -; CHECK-NEXT: %14 = fadd fast double %13, %12 -; CHECK-NEXT: store double %14, double* %"x0'", align 8, !alias.scope !9, !noalias !12 +; CHECK-NEXT: %[[i13:.+]] = load double, double* %"x0'", align 8, !alias.scope !9, !noalias !12 +; CHECK-NEXT: %[[i14:.+]] = fadd fast double %[[i13]], %[[i12]] +; CHECK-NEXT: store double %[[i14]], double* %"x0'", align 8, !alias.scope !9, !noalias !12 ; CHECK-NEXT: br label %mergeinvertfor.body_for.cond.cleanup ; CHECK: mergeinvertfor.body_for.cond.cleanup: ; preds = %invertfor.cond.cleanup @@ -98,31 +96,27 @@ entry: ; CHECK-NEXT: br label %invertfor.body ; CHECK: invertfor.body: ; preds = %incinvertfor.body, %mergeinvertfor.body_for.cond.cleanup -; CHECK-NEXT: %15 = load i64, i64* %"iv'ac", align 4 -; CHECK-NEXT: %16 = load i8**, i8*** %malloccall2_cache, align 8, !dereferenceable !6, !invariant.group !2 -; CHECK-NEXT: %17 = getelementptr inbounds i8*, i8** %16, i64 %15 -; CHECK-NEXT: %18 = load i8*, i8** %17, align 8, !invariant.group !7 -; CHECK-NEXT: %cache.x_unwrap = bitcast i8* %18 to double* -; CHECK-NEXT: %19 = load i8**, i8*** %malloccall_cache, align 8, !dereferenceable !6, !invariant.group !5 -; CHECK-NEXT: %20 = getelementptr inbounds i8*, i8** %19, i64 %15 -; CHECK-NEXT: %21 = load i8*, i8** %20, align 8, !invariant.group !8 -; CHECK-NEXT: %cache.A_unwrap = bitcast i8* %21 to double* -; CHECK-NEXT: %_unwrap = insertvalue { double*, double* } undef, double* %cache.A_unwrap, 0 -; CHECK-NEXT: %_unwrap24 = insertvalue { double*, double* } %_unwrap, double* %cache.x_unwrap, 1 -; CHECK-NEXT: %tape.ext.A = extractvalue { double*, double* } %_unwrap24, 0 -; CHECK-NEXT: %tape.ext.x = extractvalue { double*, double* } %_unwrap24, 1 -; CHECK-NEXT: %22 = bitcast double* %tape.ext.A to i8* -; CHECK-NEXT: tail call void @free(i8* nonnull %22) -; CHECK-NEXT: %23 = bitcast double* %tape.ext.x to i8* -; CHECK-NEXT: tail call void @free(i8* nonnull %23) -; CHECK-NEXT: %24 = load i64, i64* %"iv'ac", align 4 -; CHECK-NEXT: %25 = icmp eq i64 %24, 0 -; CHECK-NEXT: %26 = xor i1 %25, true -; CHECK-NEXT: br i1 %25, label %invertentry, label %incinvertfor.body +; CHECK-NEXT: %[[i15:.+]] = load i64, i64* %"iv'ac", align 4 +; CHECK-NEXT: %[[i16:.+]] = load i8**, i8*** %malloccall2_cache, align 8, !dereferenceable !6, !invariant.group !2 +; CHECK-NEXT: %[[i17:.+]] = getelementptr inbounds i8*, i8** %[[i16]], i64 %[[i15]] +; CHECK-NEXT: %[[i18:.+]] = load i8*, i8** %[[i17]], align 8, !invariant.group !7 +; CHECK-NEXT: %cache.x_unwrap = bitcast i8* %[[i18]] to double* +; CHECK-NEXT: %[[i19:.+]] = load i8**, i8*** %malloccall_cache, align 8, !dereferenceable !6, !invariant.group !5 +; CHECK-NEXT: %[[i20:.+]] = getelementptr inbounds i8*, i8** %[[i19]], i64 %[[i15]] +; CHECK-NEXT: %[[i21:.+]] = load i8*, i8** %[[i20]], align 8, !invariant.group !8 +; CHECK-NEXT: %cache.A_unwrap = bitcast i8* %[[i21]] to double* +; CHECK-NEXT: %[[i22:.+]] = bitcast double* %cache.A_unwrap to i8* +; CHECK-NEXT: tail call void @free(i8* nonnull %[[i22]]) +; CHECK-NEXT: %[[i23:.+]] = bitcast double* %cache.x_unwrap to i8* +; CHECK-NEXT: tail call void @free(i8* nonnull %[[i23]]) +; CHECK-NEXT: %[[i24:.+]] = load i64, i64* %"iv'ac", align 4 +; CHECK-NEXT: %[[i25:.+]] = icmp eq i64 %[[i24]], 0 +; CHECK-NEXT: %[[i26:.+]] = xor i1 %[[i25]], true +; CHECK-NEXT: br i1 %[[i25]], label %invertentry, label %incinvertfor.body ; CHECK: incinvertfor.body: ; preds = %invertfor.body -; CHECK-NEXT: %27 = load i64, i64* %"iv'ac", align 4 -; CHECK-NEXT: %28 = add nsw i64 %27, -1 -; CHECK-NEXT: store i64 %28, i64* %"iv'ac", align 4 +; CHECK-NEXT: %[[i27:.+]] = load i64, i64* %"iv'ac", align 4 +; CHECK-NEXT: %[[i28:.+]] = add nsw i64 %[[i27]], -1 +; CHECK-NEXT: store i64 %[[i28]], i64* %"iv'ac", align 4 ; CHECK-NEXT: br label %invertfor.body ; CHECK-NEXT: } diff --git a/enzyme/test/Enzyme/ReverseMode/blas/gemv_c_loop2.ll b/enzyme/test/Enzyme/ReverseMode/blas/gemv_c_loop2.ll index 659c997b6f6c..98dcca57f324 100644 --- a/enzyme/test/Enzyme/ReverseMode/blas/gemv_c_loop2.ll +++ b/enzyme/test/Enzyme/ReverseMode/blas/gemv_c_loop2.ll @@ -67,30 +67,28 @@ entry: ; CHECK-NEXT: store i8* %malloccall, i8** %5, align 8, !invariant.group !8 ; CHECK-NEXT: %cache.x = bitcast i8* %malloccall2 to double* ; CHECK-NEXT: call void @cblas_dcopy(i32 %1, double* %x0, i32 1, double* %cache.x, i32 1) -; CHECK-NEXT: %6 = insertvalue { double*, double* } undef, double* %cache.A, 0 -; CHECK-NEXT: %7 = insertvalue { double*, double* } %6, double* %cache.x, 1 ; CHECK-NEXT: tail call void @cblas_dgemv(i32 noundef 101, i32 noundef 111, i32 noundef %N, i32 noundef %N, double noundef 1.000000e-03, double* noundef %K, i32 noundef %N, double* noundef %x0, i32 noundef 1, double noundef 1.000000e+00, double* noundef %v0, i32 noundef 1) ; CHECK-NEXT: %exitcond.not = icmp eq i64 %iv.next, 5000 ; CHECK-NEXT: br i1 %exitcond.not, label %for.cond.cleanup, label %for.body ; CHECK: invertentry: ; preds = %invertfor.body -; CHECK-NEXT: %8 = load i64, i64* %"iv'ac", align 4 +; CHECK-NEXT: %[[i8:.+]] = load i64, i64* %"iv'ac", align 4 ; CHECK-NEXT: %forfree = load i8**, i8*** %malloccall2_cache, align 8, !dereferenceable !6, !invariant.group !2 -; CHECK-NEXT: %9 = bitcast i8** %forfree to i8* -; CHECK-NEXT: tail call void @free(i8* nonnull %9), !enzyme_cache_free !0 -; CHECK-NEXT: %10 = load i64, i64* %"iv'ac", align 4 +; CHECK-NEXT: %[[i9:.+]] = bitcast i8** %forfree to i8* +; CHECK-NEXT: tail call void @free(i8* nonnull %[[i9]]), !enzyme_cache_free !0 +; CHECK-NEXT: %[[i10:.+]] = load i64, i64* %"iv'ac", align 4 ; CHECK-NEXT: %forfree17 = load i8**, i8*** %malloccall_cache, align 8, !dereferenceable !6, !invariant.group !5 -; CHECK-NEXT: %11 = bitcast i8** %forfree17 to i8* -; CHECK-NEXT: tail call void @free(i8* nonnull %11), !enzyme_cache_free !3 +; CHECK-NEXT: %[[i11:.+]] = bitcast i8** %forfree17 to i8* +; CHECK-NEXT: tail call void @free(i8* nonnull %[[i11]]), !enzyme_cache_free !3 ; CHECK-NEXT: ret void ; CHECK: invertfor.cond.cleanup: ; preds = %for.cond.cleanup ; CHECK-NEXT: store double %differeturn, double* %"'de", align 8 -; CHECK-NEXT: %12 = load double, double* %"'de", align 8 +; CHECK-NEXT: %[[i12:.+]] = load double, double* %"'de", align 8 ; CHECK-NEXT: store double 0.000000e+00, double* %"'de", align 8 -; CHECK-NEXT: %13 = load double, double* %"x0'", align 8, !alias.scope !9, !noalias !12 -; CHECK-NEXT: %14 = fadd fast double %13, %12 -; CHECK-NEXT: store double %14, double* %"x0'", align 8, !alias.scope !9, !noalias !12 +; CHECK-NEXT: %[[i13:.+]] = load double, double* %"x0'", align 8, !alias.scope !9, !noalias !12 +; CHECK-NEXT: %[[i14:.+]] = fadd fast double %[[i13]], %[[i12]] +; CHECK-NEXT: store double %[[i14]], double* %"x0'", align 8, !alias.scope !9, !noalias !12 ; CHECK-NEXT: br label %mergeinvertfor.body_for.cond.cleanup ; CHECK: mergeinvertfor.body_for.cond.cleanup: ; preds = %invertfor.cond.cleanup @@ -98,36 +96,32 @@ entry: ; CHECK-NEXT: br label %invertfor.body ; CHECK: invertfor.body: ; preds = %incinvertfor.body, %mergeinvertfor.body_for.cond.cleanup -; CHECK-NEXT: %15 = load i64, i64* %"iv'ac", align 4 -; CHECK-NEXT: %16 = load i8**, i8*** %malloccall2_cache, align 8, !dereferenceable !6, !invariant.group !2 -; CHECK-NEXT: %17 = getelementptr inbounds i8*, i8** %16, i64 %15 -; CHECK-NEXT: %18 = load i8*, i8** %17, align 8, !invariant.group !7 -; CHECK-NEXT: %cache.x_unwrap = bitcast i8* %18 to double* -; CHECK-NEXT: %19 = load i8**, i8*** %malloccall_cache, align 8, !dereferenceable !6, !invariant.group !5 -; CHECK-NEXT: %20 = getelementptr inbounds i8*, i8** %19, i64 %15 -; CHECK-NEXT: %21 = load i8*, i8** %20, align 8, !invariant.group !8 -; CHECK-NEXT: %cache.A_unwrap = bitcast i8* %21 to double* -; CHECK-NEXT: %_unwrap = insertvalue { double*, double* } undef, double* %cache.A_unwrap, 0 -; CHECK-NEXT: %_unwrap24 = insertvalue { double*, double* } %_unwrap, double* %cache.x_unwrap, 1 -; CHECK-NEXT: %tape.ext.A = extractvalue { double*, double* } %_unwrap24, 0 -; CHECK-NEXT: %tape.ext.x = extractvalue { double*, double* } %_unwrap24, 1 -; CHECK-NEXT: call void @cblas_dger(i32 101, i32 %N, i32 %N, double 1.000000e-03, double* %"v0'", i32 1, double* %tape.ext.x, i32 1, double* %"K'", i32 %N) -; CHECK-NEXT: %22 = select i1 false, i32 %N, i32 %N -; CHECK-NEXT: call void @cblas_dgemv(i32 101, i32 112, i32 %N, i32 %N, double 1.000000e-03, double* %tape.ext.A, i32 %22, double* %"v0'", i32 1, double 1.000000e+00, double* %"x0'", i32 1) -; CHECK-NEXT: %23 = select i1 false, i32 %N, i32 %N -; CHECK-NEXT: call void @cblas_dscal(i32 %23, double 1.000000e+00, double* %"v0'", i32 1) -; CHECK-NEXT: %24 = bitcast double* %tape.ext.A to i8* -; CHECK-NEXT: tail call void @free(i8* nonnull %24) -; CHECK-NEXT: %25 = bitcast double* %tape.ext.x to i8* -; CHECK-NEXT: tail call void @free(i8* nonnull %25) -; CHECK-NEXT: %26 = load i64, i64* %"iv'ac", align 4 -; CHECK-NEXT: %27 = icmp eq i64 %26, 0 -; CHECK-NEXT: %28 = xor i1 %27, true -; CHECK-NEXT: br i1 %27, label %invertentry, label %incinvertfor.body +; CHECK-NEXT: %[[i15:.+]] = load i64, i64* %"iv'ac", align 4 +; CHECK-NEXT: %[[i16:.+]] = load i8**, i8*** %malloccall2_cache, align 8, !dereferenceable !6, !invariant.group !2 +; CHECK-NEXT: %[[i17:.+]] = getelementptr inbounds i8*, i8** %[[i16]], i64 %[[i15]] +; CHECK-NEXT: %[[i18:.+]] = load i8*, i8** %[[i17]], align 8, !invariant.group !7 +; CHECK-NEXT: %cache.x_unwrap = bitcast i8* %[[i18]] to double* +; CHECK-NEXT: %[[i19:.+]] = load i8**, i8*** %malloccall_cache, align 8, !dereferenceable !6, !invariant.group !5 +; CHECK-NEXT: %[[i20:.+]] = getelementptr inbounds i8*, i8** %[[i19]], i64 %[[i15]] +; CHECK-NEXT: %[[i21:.+]] = load i8*, i8** %[[i20]], align 8, !invariant.group !8 +; CHECK-NEXT: %cache.A_unwrap = bitcast i8* %[[i21]] to double* +; CHECK-NEXT: call void @cblas_dger(i32 101, i32 %N, i32 %N, double 1.000000e-03, double* %"v0'", i32 1, double* %cache.x_unwrap, i32 1, double* %"K'", i32 %N) +; CHECK-NEXT: %[[i22:.+]] = select i1 false, i32 %N, i32 %N +; CHECK-NEXT: call void @cblas_dgemv(i32 101, i32 112, i32 %N, i32 %N, double 1.000000e-03, double* %cache.A_unwrap, i32 %[[i22]], double* %"v0'", i32 1, double 1.000000e+00, double* %"x0'", i32 1) +; CHECK-NEXT: %[[i23:.+]] = select i1 false, i32 %N, i32 %N +; CHECK-NEXT: call void @cblas_dscal(i32 %[[i23]], double 1.000000e+00, double* %"v0'", i32 1) +; CHECK-NEXT: %[[i24:.+]] = bitcast double* %cache.A_unwrap to i8* +; CHECK-NEXT: tail call void @free(i8* nonnull %[[i24]]) +; CHECK-NEXT: %[[i25:.+]] = bitcast double* %cache.x_unwrap to i8* +; CHECK-NEXT: tail call void @free(i8* nonnull %[[i25]]) +; CHECK-NEXT: %[[i26:.+]] = load i64, i64* %"iv'ac", align 4 +; CHECK-NEXT: %[[i27:.+]] = icmp eq i64 %[[i26]], 0 +; CHECK-NEXT: %[[i28:.+]] = xor i1 %[[i27]], true +; CHECK-NEXT: br i1 %[[i27]], label %invertentry, label %incinvertfor.body ; CHECK: incinvertfor.body: ; preds = %invertfor.body -; CHECK-NEXT: %29 = load i64, i64* %"iv'ac", align 4 -; CHECK-NEXT: %30 = add nsw i64 %29, -1 -; CHECK-NEXT: store i64 %30, i64* %"iv'ac", align 4 +; CHECK-NEXT: %[[i29:.+]] = load i64, i64* %"iv'ac", align 4 +; CHECK-NEXT: %[[i30:.+]] = add nsw i64 %[[i29]], -1 +; CHECK-NEXT: store i64 %[[i30]], i64* %"iv'ac", align 4 ; CHECK-NEXT: br label %invertfor.body ; CHECK-NEXT: } diff --git a/enzyme/test/Enzyme/ReverseMode/blas/gemv_c_loop3_matcopy.ll b/enzyme/test/Enzyme/ReverseMode/blas/gemv_c_loop3_matcopy.ll index 1cb751a00458..08bf25bfc360 100644 --- a/enzyme/test/Enzyme/ReverseMode/blas/gemv_c_loop3_matcopy.ll +++ b/enzyme/test/Enzyme/ReverseMode/blas/gemv_c_loop3_matcopy.ll @@ -67,16 +67,14 @@ entry: ; CHECK-NEXT: %malloccall23 = tail call noalias nonnull i8* @malloc(i32 %mallocsize22) ; CHECK-NEXT: %cache.x24 = bitcast i8* %malloccall23 to double* ; CHECK-NEXT: call void @cblas_dcopy(i32 %8, double* %x0, i32 1, double* %cache.x24, i32 1) -; CHECK-NEXT: %9 = insertvalue { double*, double* } undef, double* %cache.A21, 0 -; CHECK-NEXT: %10 = insertvalue { double*, double* } %9, double* %cache.x24, 1 ; CHECK-NEXT: tail call void @cblas_dgemv(i32 noundef 101, i32 noundef 111, i32 noundef %N, i32 noundef %N, double noundef 1.000000e-03, double* noundef %K, i32 noundef %N, double* noundef %x0, i32 noundef 1, double noundef 1.000000e+00, double* noundef %v0, i32 noundef 1) -; CHECK-NEXT: %11 = mul i32 %N, %N -; CHECK-NEXT: %mallocsize11 = mul nuw nsw i32 %11, 8 +; CHECK-NEXT: %[[i11:.+]] = mul i32 %N, %N +; CHECK-NEXT: %mallocsize11 = mul nuw nsw i32 %[[i11]], 8 ; CHECK-NEXT: %malloccall12 = tail call noalias nonnull i8* @malloc(i32 %mallocsize11) ; CHECK-NEXT: %cache.A13 = bitcast i8* %malloccall12 to double* ; CHECK: %mul.i27 = add nuw nsw i32 %N, %N -; CHECK-NEXT: %12 = icmp eq i32 %mul.i27, 0 -; CHECK-NEXT: br i1 %12, label %__enzyme_memcpy_double_mat_32.exit38, label %init.idx.i29 +; CHECK-NEXT: %[[i12:.+]] = icmp eq i32 %mul.i27, 0 +; CHECK-NEXT: br i1 %[[i12]], label %__enzyme_memcpy_double_mat_32.exit38, label %init.idx.i29 ; CHECK: init.idx.i29: ; preds = %init.end.i37, %__enzyme_memcpy_double_mat_32.exit ; CHECK-NEXT: %j.i28 = phi i32 [ 0, %__enzyme_memcpy_double_mat_32.exit ], [ %j.next.i36, %init.end.i37 ] @@ -84,39 +82,37 @@ entry: ; CHECK: for.body.i35: ; preds = %for.body.i35, %init.idx.i29 ; CHECK-NEXT: %i.i30 = phi i32 [ 0, %init.idx.i29 ], [ %i.next.i34, %for.body.i35 ] -; CHECK-NEXT: %13 = mul nuw nsw i32 %j.i28, %N -; CHECK-NEXT: %14 = add nuw nsw i32 %i.i30, %13 -; CHECK-NEXT: %dst.i.i31 = getelementptr inbounds double, double* %cache.A13, i32 %14 -; CHECK-NEXT: %15 = mul nuw nsw i32 %j.i28, %N -; CHECK-NEXT: %16 = add nuw nsw i32 %i.i30, %15 -; CHECK-NEXT: %dst.i1.i32 = getelementptr inbounds double, double* %K, i32 %16 +; CHECK-NEXT: %[[i13:.+]] = mul nuw nsw i32 %j.i28, %N +; CHECK-NEXT: %[[i14:.+]] = add nuw nsw i32 %i.i30, %[[i13]] +; CHECK-NEXT: %dst.i.i31 = getelementptr inbounds double, double* %cache.A13, i32 %[[i14]] +; CHECK-NEXT: %[[i15:.+]] = mul nuw nsw i32 %j.i28, %N +; CHECK-NEXT: %[[i16:.+]] = add nuw nsw i32 %i.i30, %[[i15]] +; CHECK-NEXT: %dst.i1.i32 = getelementptr inbounds double, double* %K, i32 %[[i16]] ; CHECK-NEXT: %src.i.l.i33 = load double, double* %dst.i1.i32, align 8 ; CHECK-NEXT: store double %src.i.l.i33, double* %dst.i.i31, align 8 ; CHECK-NEXT: %i.next.i34 = add nuw nsw i32 %i.i30, 1 -; CHECK-NEXT: %17 = icmp eq i32 %i.next.i34, %N -; CHECK-NEXT: br i1 %17, label %init.end.i37, label %for.body.i35 +; CHECK-NEXT: %[[i17:.+]] = icmp eq i32 %i.next.i34, %N +; CHECK-NEXT: br i1 %[[i17]], label %init.end.i37, label %for.body.i35 ; CHECK: init.end.i37: ; preds = %for.body.i35 ; CHECK-NEXT: %j.next.i36 = add nuw nsw i32 %j.i28, 1 -; CHECK-NEXT: %18 = icmp eq i32 %j.next.i36, %N -; CHECK-NEXT: br i1 %18, label %__enzyme_memcpy_double_mat_32.exit38, label %init.idx.i29 +; CHECK-NEXT: %[[i18:.+]] = icmp eq i32 %j.next.i36, %N +; CHECK-NEXT: br i1 %[[i18:.+]], label %__enzyme_memcpy_double_mat_32.exit38, label %init.idx.i29 ; CHECK: __enzyme_memcpy_double_mat_32.exit38: ; preds = %__enzyme_memcpy_double_mat_32.exit, %init.end.i37 -; CHECK-NEXT: %19 = select i1 false, i32 %N, i32 %N -; CHECK-NEXT: %mallocsize14 = mul nuw nsw i32 %19, 8 +; CHECK-NEXT: %[[i19:.+]] = select i1 false, i32 %N, i32 %N +; CHECK-NEXT: %mallocsize14 = mul nuw nsw i32 %[[i19]], 8 ; CHECK-NEXT: %malloccall15 = tail call noalias nonnull i8* @malloc(i32 %mallocsize14) ; CHECK-NEXT: %cache.x16 = bitcast i8* %malloccall15 to double* -; CHECK-NEXT: call void @cblas_dcopy(i32 %19, double* %x0, i32 1, double* %cache.x16, i32 1) -; CHECK-NEXT: %20 = insertvalue { double*, double* } undef, double* %cache.A13, 0 -; CHECK-NEXT: %21 = insertvalue { double*, double* } %20, double* %cache.x16, 1 +; CHECK-NEXT: call void @cblas_dcopy(i32 %[[i19]], double* %x0, i32 1, double* %cache.x16, i32 1) ; CHECK-NEXT: tail call void @cblas_dgemv(i32 noundef 101, i32 noundef 111, i32 noundef %N, i32 noundef %N, double noundef 1.000000e-03, double* noundef %K, i32 noundef %N, double* noundef %x0, i32 noundef 1, double noundef 1.000000e+00, double* noundef %v0, i32 noundef 1) -; CHECK-NEXT: %22 = mul i32 %N, %N -; CHECK-NEXT: %mallocsize3 = mul nuw nsw i32 %22, 8 +; CHECK-NEXT: %[[i22:.+]] = mul i32 %N, %N +; CHECK-NEXT: %mallocsize3 = mul nuw nsw i32 %[[i22]], 8 ; CHECK-NEXT: %malloccall4 = tail call noalias nonnull i8* @malloc(i32 %mallocsize3) ; CHECK-NEXT: %cache.A5 = bitcast i8* %malloccall4 to double* ; CHECK: %mul.i39 = add nuw nsw i32 %N, %N -; CHECK-NEXT: %23 = icmp eq i32 %mul.i39, 0 -; CHECK-NEXT: br i1 %23, label %__enzyme_memcpy_double_mat_32.exit50, label %init.idx.i41 +; CHECK-NEXT: %[[i23:.+]] = icmp eq i32 %mul.i39, 0 +; CHECK-NEXT: br i1 %[[i23]], label %__enzyme_memcpy_double_mat_32.exit50, label %init.idx.i41 ; CHECK: init.idx.i41: ; preds = %init.end.i49, %__enzyme_memcpy_double_mat_32.exit38 ; CHECK-NEXT: %j.i40 = phi i32 [ 0, %__enzyme_memcpy_double_mat_32.exit38 ], [ %j.next.i48, %init.end.i49 ] @@ -124,39 +120,37 @@ entry: ; CHECK: for.body.i47: ; preds = %for.body.i47, %init.idx.i41 ; CHECK-NEXT: %i.i42 = phi i32 [ 0, %init.idx.i41 ], [ %i.next.i46, %for.body.i47 ] -; CHECK-NEXT: %24 = mul nuw nsw i32 %j.i40, %N -; CHECK-NEXT: %25 = add nuw nsw i32 %i.i42, %24 -; CHECK-NEXT: %dst.i.i43 = getelementptr inbounds double, double* %cache.A5, i32 %25 -; CHECK-NEXT: %26 = mul nuw nsw i32 %j.i40, %N -; CHECK-NEXT: %27 = add nuw nsw i32 %i.i42, %26 -; CHECK-NEXT: %dst.i1.i44 = getelementptr inbounds double, double* %K, i32 %27 +; CHECK-NEXT: %[[i24:.+]] = mul nuw nsw i32 %j.i40, %N +; CHECK-NEXT: %[[i25:.+]] = add nuw nsw i32 %i.i42, %[[i24]] +; CHECK-NEXT: %dst.i.i43 = getelementptr inbounds double, double* %cache.A5, i32 %[[i25]] +; CHECK-NEXT: %[[i26:.+]] = mul nuw nsw i32 %j.i40, %N +; CHECK-NEXT: %[[i27:.+]] = add nuw nsw i32 %i.i42, %[[i26]] +; CHECK-NEXT: %dst.i1.i44 = getelementptr inbounds double, double* %K, i32 %[[i27]] ; CHECK-NEXT: %src.i.l.i45 = load double, double* %dst.i1.i44, align 8 ; CHECK-NEXT: store double %src.i.l.i45, double* %dst.i.i43, align 8 ; CHECK-NEXT: %i.next.i46 = add nuw nsw i32 %i.i42, 1 -; CHECK-NEXT: %28 = icmp eq i32 %i.next.i46, %N -; CHECK-NEXT: br i1 %28, label %init.end.i49, label %for.body.i47 +; CHECK-NEXT: %[[i28:.+]] = icmp eq i32 %i.next.i46, %N +; CHECK-NEXT: br i1 %[[i28]], label %init.end.i49, label %for.body.i47 ; CHECK: init.end.i49: ; preds = %for.body.i47 ; CHECK-NEXT: %j.next.i48 = add nuw nsw i32 %j.i40, 1 -; CHECK-NEXT: %29 = icmp eq i32 %j.next.i48, %N -; CHECK-NEXT: br i1 %29, label %__enzyme_memcpy_double_mat_32.exit50, label %init.idx.i41 +; CHECK-NEXT: %[[i29:.+]] = icmp eq i32 %j.next.i48, %N +; CHECK-NEXT: br i1 %[[i29]], label %__enzyme_memcpy_double_mat_32.exit50, label %init.idx.i41 ; CHECK: __enzyme_memcpy_double_mat_32.exit50: ; preds = %__enzyme_memcpy_double_mat_32.exit38, %init.end.i49 -; CHECK-NEXT: %30 = select i1 false, i32 %N, i32 %N -; CHECK-NEXT: %mallocsize6 = mul nuw nsw i32 %30, 8 +; CHECK-NEXT: %[[i30:.+]] = select i1 false, i32 %N, i32 %N +; CHECK-NEXT: %mallocsize6 = mul nuw nsw i32 %[[i30]], 8 ; CHECK-NEXT: %malloccall7 = tail call noalias nonnull i8* @malloc(i32 %mallocsize6) ; CHECK-NEXT: %cache.x8 = bitcast i8* %malloccall7 to double* -; CHECK-NEXT: call void @cblas_dcopy(i32 %30, double* %x0, i32 1, double* %cache.x8, i32 1) -; CHECK-NEXT: %31 = insertvalue { double*, double* } undef, double* %cache.A5, 0 -; CHECK-NEXT: %32 = insertvalue { double*, double* } %31, double* %cache.x8, 1 +; CHECK-NEXT: call void @cblas_dcopy(i32 %[[i30]], double* %x0, i32 1, double* %cache.x8, i32 1) ; CHECK-NEXT: tail call void @cblas_dgemv(i32 noundef 101, i32 noundef 111, i32 noundef %N, i32 noundef %N, double noundef 1.000000e-03, double* noundef %K, i32 noundef %N, double* noundef %x0, i32 noundef 1, double noundef 1.000000e+00, double* noundef %v0, i32 noundef 1) -; CHECK-NEXT: %33 = mul i32 %N, %N -; CHECK-NEXT: %mallocsize = mul nuw nsw i32 %33, 8 +; CHECK-NEXT: %[[i33:.+]] = mul i32 %N, %N +; CHECK-NEXT: %mallocsize = mul nuw nsw i32 %[[i33]], 8 ; CHECK-NEXT: %malloccall = tail call noalias nonnull i8* @malloc(i32 %mallocsize) ; CHECK-NEXT: %cache.A = bitcast i8* %malloccall to double* ; CHECK: %mul.i51 = add nuw nsw i32 %N, %N -; CHECK-NEXT: %34 = icmp eq i32 %mul.i51, 0 -; CHECK-NEXT: br i1 %34, label %__enzyme_memcpy_double_mat_32.exit62, label %init.idx.i53 +; CHECK-NEXT: %[[i34:.+]] = icmp eq i32 %mul.i51, 0 +; CHECK-NEXT: br i1 %[[i34]], label %__enzyme_memcpy_double_mat_32.exit62, label %init.idx.i53 ; CHECK: init.idx.i53: ; preds = %init.end.i61, %__enzyme_memcpy_double_mat_32.exit50 ; CHECK-NEXT: %j.i52 = phi i32 [ 0, %__enzyme_memcpy_double_mat_32.exit50 ], [ %j.next.i60, %init.end.i61 ] @@ -164,89 +158,79 @@ entry: ; CHECK: for.body.i59: ; preds = %for.body.i59, %init.idx.i53 ; CHECK-NEXT: %i.i54 = phi i32 [ 0, %init.idx.i53 ], [ %i.next.i58, %for.body.i59 ] -; CHECK-NEXT: %35 = mul nuw nsw i32 %j.i52, %N -; CHECK-NEXT: %36 = add nuw nsw i32 %i.i54, %35 -; CHECK-NEXT: %dst.i.i55 = getelementptr inbounds double, double* %cache.A, i32 %36 -; CHECK-NEXT: %37 = mul nuw nsw i32 %j.i52, %N -; CHECK-NEXT: %38 = add nuw nsw i32 %i.i54, %37 -; CHECK-NEXT: %dst.i1.i56 = getelementptr inbounds double, double* %K, i32 %38 +; CHECK-NEXT: %[[i35:.+]] = mul nuw nsw i32 %j.i52, %N +; CHECK-NEXT: %[[i36:.+]] = add nuw nsw i32 %i.i54, %[[i35]] +; CHECK-NEXT: %dst.i.i55 = getelementptr inbounds double, double* %cache.A, i32 %[[i36]] +; CHECK-NEXT: %[[i37:.+]] = mul nuw nsw i32 %j.i52, %N +; CHECK-NEXT: %[[i38:.+]] = add nuw nsw i32 %i.i54, %[[i37]] +; CHECK-NEXT: %dst.i1.i56 = getelementptr inbounds double, double* %K, i32 %[[i38]] ; CHECK-NEXT: %src.i.l.i57 = load double, double* %dst.i1.i56, align 8 ; CHECK-NEXT: store double %src.i.l.i57, double* %dst.i.i55, align 8 ; CHECK-NEXT: %i.next.i58 = add nuw nsw i32 %i.i54, 1 -; CHECK-NEXT: %39 = icmp eq i32 %i.next.i58, %N -; CHECK-NEXT: br i1 %39, label %init.end.i61, label %for.body.i59 +; CHECK-NEXT: %[[i39:.+]] = icmp eq i32 %i.next.i58, %N +; CHECK-NEXT: br i1 %[[i39]], label %init.end.i61, label %for.body.i59 ; CHECK: init.end.i61: ; preds = %for.body.i59 ; CHECK-NEXT: %j.next.i60 = add nuw nsw i32 %j.i52, 1 -; CHECK-NEXT: %40 = icmp eq i32 %j.next.i60, %N -; CHECK-NEXT: br i1 %40, label %__enzyme_memcpy_double_mat_32.exit62, label %init.idx.i53 +; CHECK-NEXT: %[[i40:.+]] = icmp eq i32 %j.next.i60, %N +; CHECK-NEXT: br i1 %[[i40]], label %__enzyme_memcpy_double_mat_32.exit62, label %init.idx.i53 ; CHECK: __enzyme_memcpy_double_mat_32.exit62: ; preds = %__enzyme_memcpy_double_mat_32.exit50, %init.end.i61 -; CHECK-NEXT: %41 = select i1 false, i32 %N, i32 %N -; CHECK-NEXT: %mallocsize1 = mul nuw nsw i32 %41, 8 +; CHECK-NEXT: %[[i41:.+]] = select i1 false, i32 %N, i32 %N +; CHECK-NEXT: %mallocsize1 = mul nuw nsw i32 %[[i41]], 8 ; CHECK-NEXT: %malloccall2 = tail call noalias nonnull i8* @malloc(i32 %mallocsize1) ; CHECK-NEXT: %cache.x = bitcast i8* %malloccall2 to double* -; CHECK-NEXT: call void @cblas_dcopy(i32 %41, double* %x0, i32 1, double* %cache.x, i32 1) -; CHECK-NEXT: %42 = insertvalue { double*, double* } undef, double* %cache.A, 0 -; CHECK-NEXT: %43 = insertvalue { double*, double* } %42, double* %cache.x, 1 +; CHECK-NEXT: call void @cblas_dcopy(i32 %[[i41]], double* %x0, i32 1, double* %cache.x, i32 1) ; CHECK-NEXT: tail call void @cblas_dgemv(i32 noundef 101, i32 noundef 111, i32 noundef %N, i32 noundef %N, double noundef 1.000000e-03, double* noundef %K, i32 noundef %N, double* noundef %x0, i32 noundef 1, double noundef 1.000000e+00, double* noundef %v0, i32 noundef 1) ; CHECK-NEXT: tail call void @cblas_dgemv(i32 noundef 101, i32 noundef 111, i32 noundef %N, i32 noundef %N, double noundef 1.000000e-03, double* noundef %K, i32 noundef %N, double* noundef %x0, i32 noundef 1, double noundef 1.000000e+00, double* noundef %v0, i32 noundef 1) ; CHECK-NEXT: br label %invertentry ; CHECK: invertentry: ; preds = %__enzyme_memcpy_double_mat_32.exit62 ; CHECK-NEXT: store double %differeturn, double* %"'de", align 8 -; CHECK-NEXT: %44 = load double, double* %"'de", align 8 +; CHECK-NEXT: %[[i44:.+]] = load double, double* %"'de", align 8 ; CHECK-NEXT: store double 0.000000e+00, double* %"'de", align 8 -; CHECK-NEXT: %45 = load double, double* %"x0'", align 8 -; CHECK-NEXT: %46 = fadd fast double %45, %44 -; CHECK-NEXT: store double %46, double* %"x0'", align 8 +; CHECK-NEXT: %[[i45:.+]] = load double, double* %"x0'", align 8 +; CHECK-NEXT: %[[i46:.+]] = fadd fast double %[[i45:.+]], %[[i44]] +; CHECK-NEXT: store double %[[i46:.+]], double* %"x0'", align 8 ; CHECK-NEXT: call void @cblas_dger(i32 101, i32 %N, i32 %N, double 1.000000e-03, double* %"v0'", i32 1, double* %x0, i32 1, double* %"K'", i32 %N) ; CHECK-NEXT: call void @cblas_dgemv(i32 101, i32 112, i32 %N, i32 %N, double 1.000000e-03, double* %K, i32 %N, double* %"v0'", i32 1, double 1.000000e+00, double* %"x0'", i32 1) -; CHECK-NEXT: %47 = select i1 false, i32 %N, i32 %N -; CHECK-NEXT: call void @cblas_dscal(i32 %47, double 1.000000e+00, double* %"v0'", i32 1) -; CHECK-NEXT: %tape.ext.A = extractvalue { double*, double* } %43, 0 -; CHECK-NEXT: %tape.ext.x = extractvalue { double*, double* } %43, 1 -; CHECK-NEXT: call void @cblas_dger(i32 101, i32 %N, i32 %N, double 1.000000e-03, double* %"v0'", i32 1, double* %tape.ext.x, i32 1, double* %"K'", i32 %N) -; CHECK-NEXT: %48 = select i1 false, i32 %N, i32 %N -; CHECK-NEXT: call void @cblas_dgemv(i32 101, i32 112, i32 %N, i32 %N, double 1.000000e-03, double* %tape.ext.A, i32 %48, double* %"v0'", i32 1, double 1.000000e+00, double* %"x0'", i32 1) -; CHECK-NEXT: %49 = select i1 false, i32 %N, i32 %N -; CHECK-NEXT: call void @cblas_dscal(i32 %49, double 1.000000e+00, double* %"v0'", i32 1) -; CHECK-NEXT: %50 = bitcast double* %tape.ext.A to i8* -; CHECK-NEXT: tail call void @free(i8* nonnull %50) -; CHECK-NEXT: %51 = bitcast double* %tape.ext.x to i8* -; CHECK-NEXT: tail call void @free(i8* nonnull %51) -; CHECK-NEXT: %tape.ext.A9 = extractvalue { double*, double* } %32, 0 -; CHECK-NEXT: %tape.ext.x10 = extractvalue { double*, double* } %32, 1 -; CHECK-NEXT: call void @cblas_dger(i32 101, i32 %N, i32 %N, double 1.000000e-03, double* %"v0'", i32 1, double* %tape.ext.x10, i32 1, double* %"K'", i32 %N) -; CHECK-NEXT: %52 = select i1 false, i32 %N, i32 %N -; CHECK-NEXT: call void @cblas_dgemv(i32 101, i32 112, i32 %N, i32 %N, double 1.000000e-03, double* %tape.ext.A9, i32 %52, double* %"v0'", i32 1, double 1.000000e+00, double* %"x0'", i32 1) -; CHECK-NEXT: %53 = select i1 false, i32 %N, i32 %N -; CHECK-NEXT: call void @cblas_dscal(i32 %53, double 1.000000e+00, double* %"v0'", i32 1) -; CHECK-NEXT: %54 = bitcast double* %tape.ext.A9 to i8* -; CHECK-NEXT: tail call void @free(i8* nonnull %54) -; CHECK-NEXT: %55 = bitcast double* %tape.ext.x10 to i8* -; CHECK-NEXT: tail call void @free(i8* nonnull %55) -; CHECK-NEXT: %tape.ext.A17 = extractvalue { double*, double* } %21, 0 -; CHECK-NEXT: %tape.ext.x18 = extractvalue { double*, double* } %21, 1 -; CHECK-NEXT: call void @cblas_dger(i32 101, i32 %N, i32 %N, double 1.000000e-03, double* %"v0'", i32 1, double* %tape.ext.x18, i32 1, double* %"K'", i32 %N) -; CHECK-NEXT: %56 = select i1 false, i32 %N, i32 %N -; CHECK-NEXT: call void @cblas_dgemv(i32 101, i32 112, i32 %N, i32 %N, double 1.000000e-03, double* %tape.ext.A17, i32 %56, double* %"v0'", i32 1, double 1.000000e+00, double* %"x0'", i32 1) -; CHECK-NEXT: %57 = select i1 false, i32 %N, i32 %N -; CHECK-NEXT: call void @cblas_dscal(i32 %57, double 1.000000e+00, double* %"v0'", i32 1) -; CHECK-NEXT: %58 = bitcast double* %tape.ext.A17 to i8* -; CHECK-NEXT: tail call void @free(i8* nonnull %58) -; CHECK-NEXT: %59 = bitcast double* %tape.ext.x18 to i8* -; CHECK-NEXT: tail call void @free(i8* nonnull %59) -; CHECK-NEXT: %tape.ext.A25 = extractvalue { double*, double* } %10, 0 -; CHECK-NEXT: %tape.ext.x26 = extractvalue { double*, double* } %10, 1 -; CHECK-NEXT: call void @cblas_dger(i32 101, i32 %N, i32 %N, double 1.000000e-03, double* %"v0'", i32 1, double* %tape.ext.x26, i32 1, double* %"K'", i32 %N) -; CHECK-NEXT: %60 = select i1 false, i32 %N, i32 %N -; CHECK-NEXT: call void @cblas_dgemv(i32 101, i32 112, i32 %N, i32 %N, double 1.000000e-03, double* %tape.ext.A25, i32 %60, double* %"v0'", i32 1, double 1.000000e+00, double* %"x0'", i32 1) -; CHECK-NEXT: %61 = select i1 false, i32 %N, i32 %N -; CHECK-NEXT: call void @cblas_dscal(i32 %61, double 1.000000e+00, double* %"v0'", i32 1) -; CHECK-NEXT: %62 = bitcast double* %tape.ext.A25 to i8* -; CHECK-NEXT: tail call void @free(i8* nonnull %62) -; CHECK-NEXT: %63 = bitcast double* %tape.ext.x26 to i8* -; CHECK-NEXT: tail call void @free(i8* nonnull %63) +; CHECK-NEXT: %[[i47:.+]] = select i1 false, i32 %N, i32 %N +; CHECK-NEXT: call void @cblas_dscal(i32 %[[i47]], double 1.000000e+00, double* %"v0'", i32 1) +; CHECK-NEXT: call void @cblas_dger(i32 101, i32 %N, i32 %N, double 1.000000e-03, double* %"v0'", i32 1, double* %cache.x, i32 1, double* %"K'", i32 %N) +; CHECK-NEXT: %[[i48:.+]] = select i1 false, i32 %N, i32 %N +; CHECK-NEXT: call void @cblas_dgemv(i32 101, i32 112, i32 %N, i32 %N, double 1.000000e-03, double* %cache.A, i32 %[[i48]], double* %"v0'", i32 1, double 1.000000e+00, double* %"x0'", i32 1) +; CHECK-NEXT: %[[i49:.+]] = select i1 false, i32 %N, i32 %N +; CHECK-NEXT: call void @cblas_dscal(i32 %[[i49]], double 1.000000e+00, double* %"v0'", i32 1) +; CHECK-NEXT: %[[i50:.+]] = bitcast double* %cache.A to i8* +; CHECK-NEXT: tail call void @free(i8* nonnull %[[i50]]) +; CHECK-NEXT: %[[i51:.+]] = bitcast double* %cache.x to i8* +; CHECK-NEXT: tail call void @free(i8* nonnull %[[i51]]) +; CHECK-NEXT: call void @cblas_dger(i32 101, i32 %N, i32 %N, double 1.000000e-03, double* %"v0'", i32 1, double* %cache.x8, i32 1, double* %"K'", i32 %N) +; CHECK-NEXT: %[[i52:.+]] = select i1 false, i32 %N, i32 %N +; CHECK-NEXT: call void @cblas_dgemv(i32 101, i32 112, i32 %N, i32 %N, double 1.000000e-03, double* %cache.A5, i32 %[[i52]], double* %"v0'", i32 1, double 1.000000e+00, double* %"x0'", i32 1) +; CHECK-NEXT: %[[i53:.+]] = select i1 false, i32 %N, i32 %N +; CHECK-NEXT: call void @cblas_dscal(i32 %[[i53]], double 1.000000e+00, double* %"v0'", i32 1) +; CHECK-NEXT: %[[i54:.+]] = bitcast double* %cache.A5 to i8* +; CHECK-NEXT: tail call void @free(i8* nonnull %[[i54]]) +; CHECK-NEXT: %[[i55:.+]] = bitcast double* %cache.x8 to i8* +; CHECK-NEXT: tail call void @free(i8* nonnull %[[i55]]) +; CHECK-NEXT: call void @cblas_dger(i32 101, i32 %N, i32 %N, double 1.000000e-03, double* %"v0'", i32 1, double* %cache.x16, i32 1, double* %"K'", i32 %N) +; CHECK-NEXT: %[[i56:.+]] = select i1 false, i32 %N, i32 %N +; CHECK-NEXT: call void @cblas_dgemv(i32 101, i32 112, i32 %N, i32 %N, double 1.000000e-03, double* %cache.A13, i32 %[[i56]], double* %"v0'", i32 1, double 1.000000e+00, double* %"x0'", i32 1) +; CHECK-NEXT: %[[i57:.+]] = select i1 false, i32 %N, i32 %N +; CHECK-NEXT: call void @cblas_dscal(i32 %[[i57]], double 1.000000e+00, double* %"v0'", i32 1) +; CHECK-NEXT: %[[i58:.+]] = bitcast double* %cache.A13 to i8* +; CHECK-NEXT: tail call void @free(i8* nonnull %[[i58]]) +; CHECK-NEXT: %[[i59:.+]] = bitcast double* %cache.x16 to i8* +; CHECK-NEXT: tail call void @free(i8* nonnull %[[i59]]) +; CHECK-NEXT: call void @cblas_dger(i32 101, i32 %N, i32 %N, double 1.000000e-03, double* %"v0'", i32 1, double* %cache.x24, i32 1, double* %"K'", i32 %N) +; CHECK-NEXT: %[[i60:.+]] = select i1 false, i32 %N, i32 %N +; CHECK-NEXT: call void @cblas_dgemv(i32 101, i32 112, i32 %N, i32 %N, double 1.000000e-03, double* %cache.A21, i32 %[[i60]], double* %"v0'", i32 1, double 1.000000e+00, double* %"x0'", i32 1) +; CHECK-NEXT: %[[i61:.+]] = select i1 false, i32 %N, i32 %N +; CHECK-NEXT: call void @cblas_dscal(i32 %[[i61]], double 1.000000e+00, double* %"v0'", i32 1) +; CHECK-NEXT: %[[i62:.+]] = bitcast double* %cache.A21 to i8* +; CHECK-NEXT: tail call void @free(i8* nonnull %[[i62]]) +; CHECK-NEXT: %[[i63:.+]] = bitcast double* %cache.x24 to i8* +; CHECK-NEXT: tail call void @free(i8* nonnull %[[i63]]) ; CHECK-NEXT: ret void ; CHECK-NEXT: } diff --git a/enzyme/test/Enzyme/ReverseMode/blas/spmv_f_c_lacpy.ll b/enzyme/test/Enzyme/ReverseMode/blas/spmv_f_c_lacpy.ll index 7d2846b60fbf..95d4359ce46f 100644 --- a/enzyme/test/Enzyme/ReverseMode/blas/spmv_f_c_lacpy.ll +++ b/enzyme/test/Enzyme/ReverseMode/blas/spmv_f_c_lacpy.ll @@ -76,17 +76,15 @@ entry: ; CHECK-NEXT: %cache.y = bitcast i8* %malloccall2 to double* ; CHECK-NEXT: store i64 1, i64* %byref. ; CHECK-NEXT: call void @dcopy_64_(i8* %n_p, i8* %Y, i8* %incy_p, double* %cache.y, i64* %byref.) -; CHECK-NEXT: %4 = insertvalue { double*, double* } undef, double* %cache.ap, 0 -; CHECK-NEXT: %5 = insertvalue { double*, double* } %4, double* %cache.y, 1 -; CHECK-NEXT: %6 = bitcast i8* %n_p to i64* -; CHECK-NEXT: %7 = load i64, i64* %6 -; CHECK-NEXT: %8 = add i64 %7, 1 -; CHECK-NEXT: %square_mat_size_y0 = mul i64 %7, %8 +; CHECK-NEXT: %[[i6:.+]] = bitcast i8* %n_p to i64* +; CHECK-NEXT: %[[i7:.+]] = load i64, i64* %[[i6]] +; CHECK-NEXT: %[[i8:.+]] = add i64 %[[i7]], 1 +; CHECK-NEXT: %square_mat_size_y0 = mul i64 %[[i7]], %[[i8]] ; CHECK-NEXT: %size_y0 = udiv i64 %square_mat_size_y0, 2 ; CHECK-NEXT: %mallocsize4 = mul nuw nsw i64 %size_y0, 8 ; CHECK-NEXT: %malloccall5 = tail call noalias nonnull i8* @malloc(i64 %mallocsize4) ; CHECK-NEXT: %mat_y0 = bitcast i8* %malloccall5 to double* -; CHECK-NEXT: %9 = bitcast double* %mat_y0 to i8* +; CHECK-NEXT: %[[i9:.+]] = bitcast double* %mat_y0 to i8* ; CHECK-NEXT: call void @dspmv_64_(i8* %uplo, i8* %n_p, i8* %alpha, i8* %AP, i8* %X, i8* %incx_p, i8* %beta, i8* %Y, i8* %incy_p) ; CHECK-NEXT: %"ptr'ipc" = bitcast i8* %"AP'" to double* ; CHECK-NEXT: %ptr = bitcast i8* %AP to double* @@ -95,12 +93,9 @@ entry: ; CHECK: invertentry: ; preds = %entry ; CHECK-NEXT: store double 0.000000e+00, double* %"ptr'ipc", align 8, !alias.scope !3, !noalias !0 -; CHECK-NEXT: %tape.ext.ap = extractvalue { double*, double* } %5, 0 -; CHECK-NEXT: %10 = bitcast double* %tape.ext.ap to i8* -; CHECK-NEXT: %tape.ext.y = extractvalue { double*, double* } %5, 1 -; CHECK-NEXT: %11 = bitcast double* %tape.ext.y to i8* -; CHECK-NEXT: %tape.ext.y3 = extractvalue { double*, double* } %5, 1 -; CHECK-NEXT: %12 = bitcast double* %tape.ext.y3 to i8* +; CHECK-NEXT: %[[i10:.+]] = bitcast double* %cache.ap to i8* +; CHECK-NEXT: %[[i11:.+]] = bitcast double* %cache.y to i8* +; CHECK-NEXT: %[[i12:.+]] = bitcast double* %cache.y to i8* ; CHECK-NEXT: store i64 1, i64* %byref.int.one ; CHECK-NEXT: %intcast.int.one = bitcast i64* %byref.int.one to i8* ; CHECK-NEXT: store double 1.000000e+00, double* %byref.constant.fp.1.0 @@ -109,91 +104,91 @@ entry: ; CHECK-NEXT: %fpcast.constant.fp.0.0 = bitcast double* %byref.constant.fp.0.0 to i8* ; CHECK-NEXT: store i64 1, i64* %byref.constant.int.1 ; CHECK-NEXT: %intcast.constant.int.1 = bitcast i64* %byref.constant.int.1 to i8* -; CHECK-NEXT: call void @dspmv_64_(i8* %uplo, i8* %n_p, i8* %fpcast.constant.fp.1.0, i8* %10, i8* %X, i8* %incx_p, i8* %fpcast.constant.fp.0.0, i8* %9, i8* %intcast.constant.int.1) +; CHECK-NEXT: call void @dspmv_64_(i8* %uplo, i8* %n_p, i8* %fpcast.constant.fp.1.0, i8* %[[i10]], i8* %X, i8* %incx_p, i8* %fpcast.constant.fp.0.0, i8* %[[i9]], i8* %intcast.constant.int.1) ; CHECK-NEXT: store i64 1, i64* %byref.constant.int.16 ; CHECK-NEXT: %intcast.constant.int.17 = bitcast i64* %byref.constant.int.16 to i8* -; CHECK-NEXT: %13 = call fast double @ddot_64_(i8* %n_p, i8* %"Y'", i8* %incy_p, i8* %9, i8* %intcast.constant.int.17) -; CHECK-NEXT: %14 = bitcast i8* %"alpha'" to double* -; CHECK-NEXT: %15 = load double, double* %14 -; CHECK-NEXT: %16 = fadd fast double %15, %13 -; CHECK-NEXT: store double %16, double* %14 +; CHECK-NEXT: %[[i13:.+]] = call fast double @ddot_64_(i8* %n_p, i8* %"Y'", i8* %incy_p, i8* %[[i9]], i8* %intcast.constant.int.17) +; CHECK-NEXT: %[[i14:.+]] = bitcast i8* %"alpha'" to double* +; CHECK-NEXT: %[[i15:.+]] = load double, double* %[[i14]] +; CHECK-NEXT: %[[i16:.+]] = fadd fast double %[[i15]], %[[i13]] +; CHECK-NEXT: store double %[[i16]], double* %[[i14]] ; CHECK-NEXT: call void @dspr2_64_(i8* %uplo, i8* %n_p, i8* %alpha, i8* %X, i8* %incx_p, i8* %"Y'", i8* %incy_p, i8* %"AP'") -; CHECK: %17 = load i64, i64* %n -; CHECK-NEXT: %18 = load i64, i64* %incx -; CHECK-NEXT: %19 = bitcast i8* %"Y'" to i64* -; CHECK-NEXT: %20 = load i64, i64* %19 -; CHECK-NEXT: %21 = bitcast i8* %alpha to double* -; CHECK-NEXT: %22 = load double, double* %21 +; CHECK: %[[i17:.+]] = load i64, i64* %n +; CHECK-NEXT: %[[i18:.+]] = load i64, i64* %incx +; CHECK-NEXT: %[[i19:.+]] = bitcast i8* %"Y'" to i64* +; CHECK-NEXT: %[[i20:.+]] = load i64, i64* %[[i19]] +; CHECK-NEXT: %[[i21:.+]] = bitcast i8* %alpha to double* +; CHECK-NEXT: %[[i22:.+]] = load double, double* %[[i21]] ; CHECK-NEXT: %loaded.trans.i = load i8, i8* %uplo ; CHECK-DAG: %[[i0:.+]] = icmp eq i8 %loaded.trans.i, 85 ; CHECK-DAG: %[[i1:.+]] = icmp eq i8 %loaded.trans.i, 117 -; CHECK-NEXT: %25 = or i1 %[[i1]], %[[i0]] -; CHECK-NEXT: %k.i = select i1 %25, i64 0, i64 1 -; CHECK-NEXT: %26 = icmp eq i64 %17, 0 -; CHECK-NEXT: br i1 %26, label %__enzyme_spmv_diagd_64_.exit, label %init.i +; CHECK-NEXT: %[[i25:.+]] = or i1 %[[i1]], %[[i0]] +; CHECK-NEXT: %k.i = select i1 %[[i25]], i64 0, i64 1 +; CHECK-NEXT: %[[i26:.+]] = icmp eq i64 %[[i17]], 0 +; CHECK-NEXT: br i1 %[[i26]], label %__enzyme_spmv_diagd_64_.exit, label %init.i ; CHECK: init.i: ; preds = %invertentry -; CHECK-NEXT: %27 = bitcast i8* %X to double* -; CHECK-NEXT: %28 = bitcast i8* %incx_p to double* -; CHECK-NEXT: %29 = bitcast i8* %incy_p to double* -; CHECK-NEXT: br i1 %25, label %uper.i, label %lower.i +; CHECK-NEXT: %[[i27:.+]] = bitcast i8* %X to double* +; CHECK-NEXT: %[[i28:.+]] = bitcast i8* %incx_p to double* +; CHECK-NEXT: %[[i29:.+]] = bitcast i8* %incy_p to double* +; CHECK-NEXT: br i1 %[[i25]], label %uper.i, label %lower.i ; CHECK: uper.i: ; preds = %uper.i, %init.i ; CHECK-NEXT: %iteration.i = phi i64 [ 0, %init.i ], [ %iter.next.i, %uper.i ] ; CHECK-NEXT: %k1.i = phi i64 [ 0, %init.i ], [ %k.next.i, %uper.i ] ; CHECK-NEXT: %iter.next.i = add i64 %iteration.i, 1 ; CHECK-NEXT: %k.next.i = add i64 %k1.i, %iter.next.i -; CHECK-NEXT: %x.idx.i = mul nuw i64 %iteration.i, %18 -; CHECK-NEXT: %y.idx.i = mul nuw i64 %iteration.i, %20 -; CHECK-NEXT: %x.ptr.i = getelementptr inbounds double, double* %27, i64 %x.idx.i -; CHECK-NEXT: %y.ptr.i = getelementptr inbounds double, double* %28, i64 %y.idx.i +; CHECK-NEXT: %x.idx.i = mul nuw i64 %iteration.i, %[[i18]] +; CHECK-NEXT: %y.idx.i = mul nuw i64 %iteration.i, %[[i20]] +; CHECK-NEXT: %x.ptr.i = getelementptr inbounds double, double* %[[i27]], i64 %x.idx.i +; CHECK-NEXT: %y.ptr.i = getelementptr inbounds double, double* %[[i28]], i64 %y.idx.i ; CHECK-NEXT: %x.val.i = load double, double* %x.ptr.i ; CHECK-NEXT: %y.val.i = load double, double* %y.ptr.i ; CHECK-NEXT: %xy.i = fmul fast double %x.val.i, %y.val.i -; CHECK-NEXT: %xy.alpha.i = fmul fast double %xy.i, %22 -; CHECK-NEXT: %k.ptr.i = getelementptr inbounds double, double* %29, i64 %k1.i +; CHECK-NEXT: %xy.alpha.i = fmul fast double %xy.i, %[[i22]] +; CHECK-NEXT: %k.ptr.i = getelementptr inbounds double, double* %[[i29]], i64 %k1.i ; CHECK-NEXT: %k.val.i = load double, double* %k.ptr.i ; CHECK-NEXT: %k.val.new.i = fsub fast double %k.val.i, %xy.alpha.i ; CHECK-NEXT: store double %k.val.new.i, double* %k.ptr.i -; CHECK-NEXT: %30 = icmp eq i64 %iter.next.i, %17 -; CHECK-NEXT: br i1 %30, label %__enzyme_spmv_diagd_64_.exit, label %uper.i +; CHECK-NEXT: %[[i30:.+]] = icmp eq i64 %iter.next.i, %[[i17]] +; CHECK-NEXT: br i1 %[[i30]], label %__enzyme_spmv_diagd_64_.exit, label %uper.i ; CHECK: lower.i: ; preds = %lower.i, %init.i ; CHECK-NEXT: %iteration2.i = phi i64 [ 0, %init.i ], [ %iter.next4.i, %lower.i ] ; CHECK-NEXT: %k3.i = phi i64 [ 0, %init.i ], [ %k.next5.i, %lower.i ] ; CHECK-NEXT: %iter.next4.i = add i64 %iteration2.i, 1 -; CHECK-NEXT: %tmp.val.i = add i64 %17, 1 +; CHECK-NEXT: %tmp.val.i = add i64 %[[i17]], 1 ; CHECK-NEXT: %tmp.val.other.i = sub i64 %tmp.val.i, %iter.next4.i ; CHECK-NEXT: %k.next5.i = add i64 %k3.i, %tmp.val.other.i -; CHECK-NEXT: %x.idx6.i = mul nuw i64 %iteration2.i, %18 -; CHECK-NEXT: %y.idx7.i = mul nuw i64 %iteration2.i, %20 -; CHECK-NEXT: %x.ptr8.i = getelementptr inbounds double, double* %27, i64 %x.idx6.i -; CHECK-NEXT: %y.ptr9.i = getelementptr inbounds double, double* %28, i64 %y.idx7.i +; CHECK-NEXT: %x.idx6.i = mul nuw i64 %iteration2.i, %[[i18]] +; CHECK-NEXT: %y.idx7.i = mul nuw i64 %iteration2.i, %[[i20]] +; CHECK-NEXT: %x.ptr8.i = getelementptr inbounds double, double* %[[i27]], i64 %x.idx6.i +; CHECK-NEXT: %y.ptr9.i = getelementptr inbounds double, double* %[[i28]], i64 %y.idx7.i ; CHECK-NEXT: %x.val10.i = load double, double* %x.ptr8.i ; CHECK-NEXT: %y.val11.i = load double, double* %y.ptr9.i ; CHECK-NEXT: %xy12.i = fmul fast double %x.val10.i, %y.val11.i -; CHECK-NEXT: %xy.alpha13.i = fmul fast double %xy12.i, %22 -; CHECK-NEXT: %k.ptr14.i = getelementptr inbounds double, double* %29, i64 %k3.i +; CHECK-NEXT: %xy.alpha13.i = fmul fast double %xy12.i, %[[i22]] +; CHECK-NEXT: %k.ptr14.i = getelementptr inbounds double, double* %[[i29]], i64 %k3.i ; CHECK-NEXT: %k.val15.i = load double, double* %k.ptr14.i ; CHECK-NEXT: %k.val.new16.i = fsub fast double %k.val15.i, %xy.alpha13.i ; CHECK-NEXT: store double %k.val.new16.i, double* %k.ptr14.i -; CHECK-NEXT: %31 = icmp eq i64 %iter.next4.i, %17 -; CHECK-NEXT: br i1 %31, label %__enzyme_spmv_diagd_64_.exit, label %lower.i +; CHECK-NEXT: %[[i31:.+]] = icmp eq i64 %iter.next4.i, %[[i17]] +; CHECK-NEXT: br i1 %[[i31]], label %__enzyme_spmv_diagd_64_.exit, label %lower.i ; CHECK: __enzyme_spmv_diagd_64_.exit: ; preds = %invertentry, %uper.i, %lower.i ; CHECK-NEXT: store double 1.000000e+00, double* %byref.constant.fp.1.08 ; CHECK-NEXT: %fpcast.constant.fp.1.09 = bitcast double* %byref.constant.fp.1.08 to i8* -; CHECK-NEXT: call void @dspmv_64_(i8* %uplo, i8* %n_p, i8* %alpha, i8* %10, i8* %"Y'", i8* %incy_p, i8* %fpcast.constant.fp.1.09, i8* %"X'", i8* %incx_p) -; CHECK-NEXT: %32 = call fast double @ddot_64_(i8* %n_p, i8* %"Y'", i8* %incy_p, i8* %11, i8* %intcast.int.one) -; CHECK-NEXT: %33 = bitcast i8* %"beta'" to double* -; CHECK-NEXT: %34 = load double, double* %33 -; CHECK-NEXT: %35 = fadd fast double %34, %32 -; CHECK-NEXT: store double %35, double* %33 +; CHECK-NEXT: call void @dspmv_64_(i8* %uplo, i8* %n_p, i8* %alpha, i8* %[[i10]], i8* %"Y'", i8* %incy_p, i8* %fpcast.constant.fp.1.09, i8* %"X'", i8* %incx_p) +; CHECK-NEXT: %[[i32:.+]] = call fast double @ddot_64_(i8* %n_p, i8* %"Y'", i8* %incy_p, i8* %[[i11]], i8* %intcast.int.one) +; CHECK-NEXT: %[[i33:.+]] = bitcast i8* %"beta'" to double* +; CHECK-NEXT: %[[i34:.+]] = load double, double* %[[i33]] +; CHECK-NEXT: %[[i35:.+]] = fadd fast double %[[i34]], %[[i32]] +; CHECK-NEXT: store double %[[i35]], double* %[[i33]] ; CHECK-NEXT: call void @dscal_64_(i8* %n_p, i8* %beta, i8* %"Y'", i8* %incy_p) -; CHECK-NEXT: %36 = bitcast double* %tape.ext.ap to i8* -; CHECK-NEXT: tail call void @free(i8* nonnull %36) -; CHECK-NEXT: %37 = bitcast double* %tape.ext.y3 to i8* -; CHECK-NEXT: tail call void @free(i8* nonnull %37) +; CHECK-NEXT: %[[i36:.+]] = bitcast double* %cache.ap to i8* +; CHECK-NEXT: tail call void @free(i8* nonnull %[[i36]]) +; CHECK-NEXT: %[[i37:.+]] = bitcast double* %cache.y to i8* +; CHECK-NEXT: tail call void @free(i8* nonnull %[[i37]]) ; CHECK-NEXT: ret void ; CHECK-NEXT: } diff --git a/enzyme/test/Enzyme/ReverseMode/insertuw.ll b/enzyme/test/Enzyme/ReverseMode/insertuw.ll index 9e9a8d9f5ca3..50796276b7e8 100644 --- a/enzyme/test/Enzyme/ReverseMode/insertuw.ll +++ b/enzyme/test/Enzyme/ReverseMode/insertuw.ll @@ -54,10 +54,7 @@ declare void @__enzyme_autodiff(...) ; CHECK-NEXT: %pre_x1 = load double, double* %in1 ; CHECK-NEXT: store double 0.000000e+00, double* %in1 ; CHECK-NEXT: %x1 = insertvalue { double, double, double* } %x0, double %pre_x1, 1 -; CHECK-NEXT: %out1 = insertvalue { double, double, double* } %x1, double* %in0, 2 -; CHECK-NEXT: %post_x0 = extractvalue { double, double, double* } %out1, 0 -; CHECK-NEXT: %post_x1 = extractvalue { double, double, double* } %x1, 1 -; CHECK-NEXT: %mul0 = fmul double %post_x0, %post_x1 +; CHECK-NEXT: %mul0 = fmul double %pre_x0, %pre_x1 ; CHECK-NEXT: store double %mul0, double* %in0 ; CHECK-NEXT: br label %exit @@ -88,7 +85,6 @@ declare void @__enzyme_autodiff(...) ; CHECK-NEXT: %[[i9:.+]] = fadd fast double %[[i8]], %[[i2]] ; CHECK-NEXT: store double %[[i9]], double* %[[i7]] ; CHECK-NEXT: %[[i10:.+]] = load { double, double, double* }, { double, double, double* }* %"out1'de" -; CHECK-NEXT: %[[i11:.+]] = insertvalue { double, double, double* } %[[i10]], double* null, 2 ; CHECK-NEXT: %[[i12:.+]] = load { double, double, double* }, { double, double, double* }* %"x1'de" ; CHECK-NEXT: %[[i13:.+]] = extractvalue { double, double, double* } %[[i10]], 0 ; CHECK-NEXT: %[[i14:.+]] = getelementptr inbounds { double, double, double* }, { double, double, double* }* %"x1'de", i32 0, i32 0 @@ -105,7 +101,6 @@ declare void @__enzyme_autodiff(...) ; CHECK-NEXT: %[[i22:.+]] = extractvalue { double, double, double* } %[[i21]], 1 ; CHECK-NEXT: %[[i23:.+]] = fadd fast double 0.000000e+00, %[[i22]] ; CHECK-NEXT: %[[i24:.+]] = load { double, double, double* }, { double, double, double* }* %"x1'de" -; CHECK-NEXT: %[[i25:.+]] = insertvalue { double, double, double* } %[[i24]], double 0.000000e+00, 1 ; CHECK-NEXT: %[[i26:.+]] = load { double, double, double* }, { double, double, double* }* %"x0'de" ; CHECK-NEXT: %[[i27:.+]] = extractvalue { double, double, double* } %[[i24]], 0 ; CHECK-NEXT: %[[i28:.+]] = getelementptr inbounds { double, double, double* }, { double, double, double* }* %"x0'de", i32 0, i32 0 diff --git a/enzyme/test/Enzyme/ReverseMode/insertuw2.ll b/enzyme/test/Enzyme/ReverseMode/insertuw2.ll index e1003baa7220..43f278591769 100644 --- a/enzyme/test/Enzyme/ReverseMode/insertuw2.ll +++ b/enzyme/test/Enzyme/ReverseMode/insertuw2.ll @@ -57,11 +57,7 @@ declare void @__enzyme_autodiff(...) ; CHECK-NEXT: %pre_x1 = load double, double* %in1 ; CHECK-NEXT: store double 0.000000e+00, double* %in1 ; CHECK-NEXT: %x1 = insertvalue { double, double, double* } %x0, double %pre_x1, 1 -; CHECK-NEXT: %out1 = insertvalue { double, double, double* } %x1, double* %in0, 2 -; CHECK-NEXT: %out2 = insertvalue { double, double, double* } %out1, double 0.000000e+00, 1 -; CHECK-NEXT: %post_x0 = extractvalue { double, double, double* } %out2, 0 -; CHECK-NEXT: %post_x1 = extractvalue { double, double, double* } %x1, 1 -; CHECK-NEXT: %mul0 = fmul double %post_x0, %post_x1 +; CHECK-NEXT: %mul0 = fmul double %pre_x0, %pre_x1 ; CHECK-NEXT: store double %mul0, double* %in0 ; CHECK-NEXT: br label %exit @@ -93,7 +89,6 @@ declare void @__enzyme_autodiff(...) ; CHECK-NEXT: %[[i9:.+]] = fadd fast double %[[i8]], %[[i2]] ; CHECK-NEXT: store double %[[i9]], double* %[[i7]] ; CHECK-NEXT: %[[i10:.+]] = load { double, double, double* }, { double, double, double* }* %"out2'de" -; CHECK-NEXT: %[[i11:.+]] = insertvalue { double, double, double* } %[[i10]], double 0.000000e+00, 1 ; CHECK-NEXT: %[[i12:.+]] = load { double, double, double* }, { double, double, double* }* %"out1'de" ; CHECK-NEXT: %[[i13:.+]] = extractvalue { double, double, double* } %[[i10]], 0 ; CHECK-NEXT: %[[i14:.+]] = getelementptr inbounds { double, double, double* }, { double, double, double* }* %"out1'de", i32 0, i32 0 @@ -106,7 +101,6 @@ declare void @__enzyme_autodiff(...) ; CHECK-NEXT: store double %[[i19]], double* %[[i17]] ; CHECK-NEXT: store { double, double, double* } zeroinitializer, { double, double, double* }* %"out2'de" ; CHECK-NEXT: %[[i20:.+]] = load { double, double, double* }, { double, double, double* }* %"out1'de" -; CHECK-NEXT: %[[i21:.+]] = insertvalue { double, double, double* } %[[i20]], double* null, 2 ; CHECK-NEXT: %[[i22:.+]] = load { double, double, double* }, { double, double, double* }* %"x1'de" ; CHECK-NEXT: %[[i23:.+]] = extractvalue { double, double, double* } %[[i20]], 0 ; CHECK-NEXT: %[[i24:.+]] = getelementptr inbounds { double, double, double* }, { double, double, double* }* %"x1'de", i32 0, i32 0 @@ -123,7 +117,6 @@ declare void @__enzyme_autodiff(...) ; CHECK-NEXT: %[[i32:.+]] = extractvalue { double, double, double* } %[[i31]], 1 ; CHECK-NEXT: %[[i33:.+]] = fadd fast double 0.000000e+00, %[[i32]] ; CHECK-NEXT: %[[i34:.+]] = load { double, double, double* }, { double, double, double* }* %"x1'de" -; CHECK-NEXT: %[[i35:.+]] = insertvalue { double, double, double* } %[[i34]], double 0.000000e+00, 1 ; CHECK-NEXT: %[[i36:.+]] = load { double, double, double* }, { double, double, double* }* %"x0'de" ; CHECK-NEXT: %[[i37:.+]] = extractvalue { double, double, double* } %[[i34]], 0 ; CHECK-NEXT: %[[i38:.+]] = getelementptr inbounds { double, double, double* }, { double, double, double* }* %"x0'de", i32 0, i32 0 diff --git a/enzyme/test/Enzyme/ReverseMode/needsCacheWholeAllocation.ll b/enzyme/test/Enzyme/ReverseMode/needsCacheWholeAllocation.ll index 5170fe8b43db..3e4ad2f93bd0 100644 --- a/enzyme/test/Enzyme/ReverseMode/needsCacheWholeAllocation.ll +++ b/enzyme/test/Enzyme/ReverseMode/needsCacheWholeAllocation.ll @@ -67,12 +67,11 @@ bb: ; CHECK-NEXT: store i1 %i6, i1* %7 ; CHECK-NEXT: store i64 %i2, i64* %5 ; CHECK-NEXT: call void @augmented_a25(double* %a5, double* %"a5'", i64* nocapture readonly %5, i1 %i6) -; CHECK-NEXT: %8 = insertvalue { i8*, double*, double* } undef, double* %a5, 1 -; CHECK-NEXT: %9 = getelementptr inbounds { i8*, double*, double* }, { i8*, double*, double* }* %0, i32 0, i32 1 -; CHECK-NEXT: store double* %a5, double** %9 -; CHECK-NEXT: %10 = getelementptr inbounds { i8*, double*, double* }, { i8*, double*, double* }* %0, i32 0, i32 2 -; CHECK-NEXT: store double* %"a5'", double** %10 -; CHECK-NEXT: %11 = load { i8*, double*, double* }, { i8*, double*, double* }* %0 -; CHECK-NEXT: ret { i8*, double*, double* } %11 +; CHECK-NEXT: %[[i9:.+]] = getelementptr inbounds { i8*, double*, double* }, { i8*, double*, double* }* %0, i32 0, i32 1 +; CHECK-NEXT: store double* %a5, double** %[[i9]] +; CHECK-NEXT: %[[i10:.+]] = getelementptr inbounds { i8*, double*, double* }, { i8*, double*, double* }* %0, i32 0, i32 2 +; CHECK-NEXT: store double* %"a5'", double** %[[i10]] +; CHECK-NEXT: %[[i11:.+]] = load { i8*, double*, double* }, { i8*, double*, double* }* %0 +; CHECK-NEXT: ret { i8*, double*, double* } %[[i11]] ; CHECK-NEXT: } diff --git a/enzyme/test/Enzyme/ReverseMode/unnecessaryalloc.ll b/enzyme/test/Enzyme/ReverseMode/unnecessaryalloc.ll index f913e750c44c..3083e56df4f4 100644 --- a/enzyme/test/Enzyme/ReverseMode/unnecessaryalloc.ll +++ b/enzyme/test/Enzyme/ReverseMode/unnecessaryalloc.ll @@ -59,7 +59,6 @@ declare void @free(i8*) ; CHECK-NEXT: %mallocsize = mul nuw nsw i64 %i1, 8 ; CHECK-NEXT: %malloccall = tail call noalias nonnull i8* @malloc(i64 %mallocsize) ; CHECK-NEXT: tail call void @free(i8* nonnull %malloccall) -; CHECK-NEXT: %0 = insertvalue { i8*, double } undef, double %x, 1 ; CHECK-NEXT: ret double %x ; CHECK-NEXT: } diff --git a/enzyme/test/Enzyme/ReverseModeVector/mul.ll b/enzyme/test/Enzyme/ReverseModeVector/mul.ll index f6c3c3ff1440..c116daa18b4a 100644 --- a/enzyme/test/Enzyme/ReverseModeVector/mul.ll +++ b/enzyme/test/Enzyme/ReverseModeVector/mul.ll @@ -28,7 +28,6 @@ entry: ; CHECK-NEXT: store [2 x double] zeroinitializer, [2 x double]* %"y'de" ; CHECK-NEXT: %0 = extractvalue [2 x double] %differeturn, 0 ; CHECK-NEXT: %[[m0diffex:.+]] = fmul fast double %0, %y -; CHECK-NEXT: %[[i1:.+]] = insertvalue [2 x double] undef, double %[[m0diffex]], 0 ; CHECK-NEXT: %[[i2:.+]] = extractvalue [2 x double] %differeturn, 1 ; CHECK-NEXT: %[[m0diffex1:.+]] = fmul fast double %[[i2]], %y ; CHECK-NEXT: %[[i6:.+]] = getelementptr inbounds [2 x double], [2 x double]* %"x'de", i32 0, i32 0 @@ -40,7 +39,6 @@ entry: ; CHECK-NEXT: %[[i11:.+]] = fadd fast double %[[i10]], %[[m0diffex1]] ; CHECK-NEXT: store double %[[i11]], double* %[[i9]] ; CHECK-NEXT: %[[m1diffey:.+]] = fmul fast double %0, %x -; CHECK-NEXT: %[[i4:.+]] = insertvalue [2 x double] undef, double %[[m1diffey]], 0 ; CHECK-NEXT: %[[m1diffey2:.+]] = fmul fast double %[[i2]], %x ; CHECK-NEXT: %[[i12:.+]] = getelementptr inbounds [2 x double], [2 x double]* %"y'de", i32 0, i32 0 ; CHECK-NEXT: %[[i13:.+]] = load double, double* %[[i12]] diff --git a/enzyme/test/Enzyme/ReverseModeVector/square.ll b/enzyme/test/Enzyme/ReverseModeVector/square.ll index 8064e1e7f46f..31d65c99f1b3 100644 --- a/enzyme/test/Enzyme/ReverseModeVector/square.ll +++ b/enzyme/test/Enzyme/ReverseModeVector/square.ll @@ -24,7 +24,6 @@ entry: ; CHECK-NEXT: store [2 x double] zeroinitializer, [2 x double]* %"x'de" ; CHECK-NEXT: %0 = extractvalue [2 x double] %differeturn, 0 ; CHECK-NEXT: %[[m0diffex:.+]] = fmul fast double %0, %x -; CHECK-NEXT: %[[i1:.+]] = insertvalue [2 x double] undef, double %[[m0diffex]], 0 ; CHECK-NEXT: %[[i2:.+]] = extractvalue [2 x double] %differeturn, 1 ; CHECK-NEXT: %[[m0diffex1:.+]] = fmul fast double %[[i2]], %x ; CHECK-NEXT: %[[i4:.+]] = getelementptr inbounds [2 x double], [2 x double]* %"x'de", i32 0, i32 0