diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp index 9fbb7b461364b1..7f3c5cf6cb4436 100644 --- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp +++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp @@ -100,20 +100,34 @@ static void replaceWithTLIFunction(IntrinsicInst *II, VFInfo &Info, static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI, IntrinsicInst *II) { assert(II != nullptr && "Intrinsic cannot be null"); + Intrinsic::ID IID = II->getIntrinsicID(); + Type *RetTy = II->getType(); + Type *ScalarRetTy = RetTy->getScalarType(); // At the moment VFABI assumes the return type is always widened unless it is // a void type. - auto *VTy = dyn_cast(II->getType()); + auto *VTy = dyn_cast(RetTy); ElementCount EC(VTy ? VTy->getElementCount() : ElementCount::getFixed(0)); + + // OloadTys collects types used in scalar intrinsic overload name. + SmallVector OloadTys; + if (!RetTy->isVoidTy() && isVectorIntrinsicWithOverloadTypeAtArg(IID, -1)) + OloadTys.push_back(ScalarRetTy); + // Compute the argument types of the corresponding scalar call and check that // all vector operands match the previously found EC. SmallVector ScalarArgTypes; - Intrinsic::ID IID = II->getIntrinsicID(); for (auto Arg : enumerate(II->args())) { auto *ArgTy = Arg.value()->getType(); + bool IsOloadTy = isVectorIntrinsicWithOverloadTypeAtArg(IID, Arg.index()); if (isVectorIntrinsicWithScalarOpAtArg(IID, Arg.index())) { ScalarArgTypes.push_back(ArgTy); + if (IsOloadTy) + OloadTys.push_back(ArgTy); } else if (auto *VectorArgTy = dyn_cast(ArgTy)) { - ScalarArgTypes.push_back(VectorArgTy->getElementType()); + auto *ScalarArgTy = VectorArgTy->getElementType(); + ScalarArgTypes.push_back(ScalarArgTy); + if (IsOloadTy) + OloadTys.push_back(ScalarArgTy); // When return type is void, set EC to the first vector argument, and // disallow vector arguments with different ECs. if (EC.isZero()) @@ -129,7 +143,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI, // using scalar argument types. std::string ScalarName = Intrinsic::isOverloaded(IID) - ? Intrinsic::getName(IID, ScalarArgTypes, II->getModule()) + ? Intrinsic::getName(IID, OloadTys, II->getModule()) : Intrinsic::getName(IID).str(); // Try to find the mapping for the scalar version of this intrinsic and the @@ -146,7 +160,6 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI, // Replace the call to the intrinsic with a call to the vector library // function. - Type *ScalarRetTy = II->getType()->getScalarType(); FunctionType *ScalarFTy = FunctionType::get(ScalarRetTy, ScalarArgTypes, /*isVarArg*/ false); const std::string MangledName = VD->getVectorFunctionABIVariantString(); @@ -193,6 +206,8 @@ static bool runImpl(const TargetLibraryInfo &TLI, Function &F) { for (auto &I : instructions(F)) { // Process only intrinsic calls that return void or a vector. if (auto *II = dyn_cast(&I)) { + if (II->getIntrinsicID() == Intrinsic::not_intrinsic) + continue; if (!II->getType()->isVectorTy() && !II->getType()->isVoidTy()) continue; diff --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll index f7e95008b71237..7b173bda561553 100644 --- a/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll +++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll @@ -15,7 +15,7 @@ declare @llvm.cos.nxv2f64() declare @llvm.cos.nxv4f32() ;. -; CHECK: @llvm.compiler.used = appending global [60 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vtanq_f64, ptr @armpl_vtanq_f32, ptr @armpl_svtan_f64_x, ptr @armpl_svtan_f32_x, ptr @armpl_vacosq_f64, ptr @armpl_vacosq_f32, ptr @armpl_svacos_f64_x, ptr @armpl_svacos_f32_x, ptr @armpl_vasinq_f64, ptr @armpl_vasinq_f32, ptr @armpl_svasin_f64_x, ptr @armpl_svasin_f32_x, ptr @armpl_vatanq_f64, ptr @armpl_vatanq_f32, ptr @armpl_svatan_f64_x, ptr @armpl_svatan_f32_x, ptr @armpl_vcoshq_f64, ptr @armpl_vcoshq_f32, ptr @armpl_svcosh_f64_x, ptr @armpl_svcosh_f32_x, ptr @armpl_vsinhq_f64, ptr @armpl_vsinhq_f32, ptr @armpl_svsinh_f64_x, ptr @armpl_svsinh_f32_x, ptr @armpl_vtanhq_f64, ptr @armpl_vtanhq_f32, ptr @armpl_svtanh_f64_x, ptr @armpl_svtanh_f32_x], section "llvm.metadata" +; CHECK: @llvm.compiler.used = appending global [64 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vpowq_f64, ptr @armpl_vpowq_f32, ptr @armpl_svpow_f64_x, ptr @armpl_svpow_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vtanq_f64, ptr @armpl_vtanq_f32, ptr @armpl_svtan_f64_x, ptr @armpl_svtan_f32_x, ptr @armpl_vacosq_f64, ptr @armpl_vacosq_f32, ptr @armpl_svacos_f64_x, ptr @armpl_svacos_f32_x, ptr @armpl_vasinq_f64, ptr @armpl_vasinq_f32, ptr @armpl_svasin_f64_x, ptr @armpl_svasin_f32_x, ptr @armpl_vatanq_f64, ptr @armpl_vatanq_f32, ptr @armpl_svatan_f64_x, ptr @armpl_svatan_f32_x, ptr @armpl_vcoshq_f64, ptr @armpl_vcoshq_f32, ptr @armpl_svcosh_f64_x, ptr @armpl_svcosh_f32_x, ptr @armpl_vsinhq_f64, ptr @armpl_vsinhq_f32, ptr @armpl_svsinh_f64_x, ptr @armpl_svsinh_f32_x, ptr @armpl_vtanhq_f64, ptr @armpl_vtanhq_f32, ptr @armpl_svtanh_f64_x, ptr @armpl_svtanh_f32_x], section "llvm.metadata" ;. define <2 x double> @llvm_cos_f64(<2 x double> %in) { @@ -333,17 +333,10 @@ declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) declare @llvm.pow.nxv2f64(, ) declare @llvm.pow.nxv4f32(, ) -; -; There is a bug in the replace-with-veclib pass, and for intrinsics which take -; more than one arguments, but has just one overloaded type, it incorrectly -; reconstructs the scalar name, for pow specifically it is searching for: -; llvm.pow.f64.f64 and llvm.pow.f32.f32 -; - define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %power) { ; CHECK-LABEL: define <2 x double> @llvm_pow_f64 ; CHECK-SAME: (<2 x double> [[IN:%.*]], <2 x double> [[POWER:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.pow.v2f64(<2 x double> [[IN]], <2 x double> [[POWER]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vpowq_f64(<2 x double> [[IN]], <2 x double> [[POWER]]) ; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %in, <2 x double> %power) @@ -353,7 +346,7 @@ define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %power) { define <4 x float> @llvm_pow_f32(<4 x float> %in, <4 x float> %power) { ; CHECK-LABEL: define <4 x float> @llvm_pow_f32 ; CHECK-SAME: (<4 x float> [[IN:%.*]], <4 x float> [[POWER:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.pow.v4f32(<4 x float> [[IN]], <4 x float> [[POWER]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vpowq_f32(<4 x float> [[IN]], <4 x float> [[POWER]]) ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %in, <4 x float> %power) @@ -363,7 +356,7 @@ define <4 x float> @llvm_pow_f32(<4 x float> %in, <4 x float> %power) { define @llvm_pow_vscale_f64( %in, %power) #0 { ; CHECK-LABEL: define @llvm_pow_vscale_f64 ; CHECK-SAME: ( [[IN:%.*]], [[POWER:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.pow.nxv2f64( [[IN]], [[POWER]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svpow_f64_x( [[IN]], [[POWER]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.pow.nxv2f64( %in, %power) @@ -373,7 +366,7 @@ define @llvm_pow_vscale_f64( %in, @llvm_pow_vscale_f32( %in, %power) #0 { ; CHECK-LABEL: define @llvm_pow_vscale_f32 ; CHECK-SAME: ( [[IN:%.*]], [[POWER:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.pow.nxv4f32( [[IN]], [[POWER]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svpow_f32_x( [[IN]], [[POWER]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.pow.nxv4f32( %in, %power) diff --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll index f3f27344ad80e3..155587026b464b 100644 --- a/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll +++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll @@ -4,7 +4,7 @@ target triple = "aarch64-unknown-linux-gnu" ;. -; CHECK: @llvm.compiler.used = appending global [30 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf, ptr @_ZGVsMxv_tan, ptr @_ZGVsMxv_tanf, ptr @_ZGVsMxv_acos, ptr @_ZGVsMxv_acosf, ptr @_ZGVsMxv_asin, ptr @_ZGVsMxv_asinf, ptr @_ZGVsMxv_atan, ptr @_ZGVsMxv_atanf, ptr @_ZGVsMxv_cosh, ptr @_ZGVsMxv_coshf, ptr @_ZGVsMxv_sinh, ptr @_ZGVsMxv_sinhf, ptr @_ZGVsMxv_tanh, ptr @_ZGVsMxv_tanhf], section "llvm.metadata" +; CHECK: @llvm.compiler.used = appending global [32 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxvv_pow, ptr @_ZGVsMxvv_powf, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf, ptr @_ZGVsMxv_tan, ptr @_ZGVsMxv_tanf, ptr @_ZGVsMxv_acos, ptr @_ZGVsMxv_acosf, ptr @_ZGVsMxv_asin, ptr @_ZGVsMxv_asinf, ptr @_ZGVsMxv_atan, ptr @_ZGVsMxv_atanf, ptr @_ZGVsMxv_cosh, ptr @_ZGVsMxv_coshf, ptr @_ZGVsMxv_sinh, ptr @_ZGVsMxv_sinhf, ptr @_ZGVsMxv_tanh, ptr @_ZGVsMxv_tanhf], section "llvm.metadata" ;. define @llvm_ceil_vscale_f64( %in) { ; CHECK-LABEL: @llvm_ceil_vscale_f64( @@ -278,7 +278,7 @@ define @llvm_nearbyint_vscale_f32( %in) define @llvm_pow_vscale_f64( %in, %pow) { ; CHECK-LABEL: @llvm_pow_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.pow.nxv2f64( [[IN:%.*]], [[POW:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxvv_pow( [[IN:%.*]], [[POW:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.pow.nxv2f64( %in, %pow) @@ -287,7 +287,7 @@ define @llvm_pow_vscale_f64( %in, @llvm_pow_vscale_f32( %in, %pow) { ; CHECK-LABEL: @llvm_pow_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.pow.nxv4f32( [[IN:%.*]], [[POW:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxvv_powf( [[IN:%.*]], [[POW:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.pow.nxv4f32( %in, %pow) diff --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef.ll index 59c2f94e167633..b89bf3d6f2ca97 100644 --- a/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef.ll +++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef.ll @@ -4,7 +4,7 @@ target triple = "aarch64-unknown-linux-gnu" ;. -; CHECK: @llvm.compiler.used = appending global [30 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf, ptr @_ZGVnN2v_tan, ptr @_ZGVnN4v_tanf, ptr @_ZGVnN2v_acos, ptr @_ZGVnN4v_acosf, ptr @_ZGVnN2v_asin, ptr @_ZGVnN4v_asinf, ptr @_ZGVnN2v_atan, ptr @_ZGVnN4v_atanf, ptr @_ZGVnN2v_cosh, ptr @_ZGVnN4v_coshf, ptr @_ZGVnN2v_sinh, ptr @_ZGVnN4v_sinhf, ptr @_ZGVnN2v_tanh, ptr @_ZGVnN4v_tanhf], section "llvm.metadata" +; CHECK: @llvm.compiler.used = appending global [32 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2vv_pow, ptr @_ZGVnN4vv_powf, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf, ptr @_ZGVnN2v_tan, ptr @_ZGVnN4v_tanf, ptr @_ZGVnN2v_acos, ptr @_ZGVnN4v_acosf, ptr @_ZGVnN2v_asin, ptr @_ZGVnN4v_asinf, ptr @_ZGVnN2v_atan, ptr @_ZGVnN4v_atanf, ptr @_ZGVnN2v_cosh, ptr @_ZGVnN4v_coshf, ptr @_ZGVnN2v_sinh, ptr @_ZGVnN4v_sinhf, ptr @_ZGVnN2v_tanh, ptr @_ZGVnN4v_tanhf], section "llvm.metadata" ;. define <2 x double> @llvm_ceil_f64(<2 x double> %in) { ; CHECK-LABEL: @llvm_ceil_f64( @@ -278,7 +278,7 @@ define <4 x float> @llvm_nearbyint_f32(<4 x float> %in) { define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %pow) { ; CHECK-LABEL: @llvm_pow_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.pow.v2f64(<2 x double> [[IN:%.*]], <2 x double> [[POW:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2vv_pow(<2 x double> [[IN:%.*]], <2 x double> [[POW:%.*]]) ; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %in, <2 x double> %pow) @@ -287,7 +287,7 @@ define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %pow) { define <4 x float> @llvm_pow_f32(<4 x float> %in, <4 x float> %pow) { ; CHECK-LABEL: @llvm_pow_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.pow.v4f32(<4 x float> [[IN:%.*]], <4 x float> [[POW:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4vv_powf(<4 x float> [[IN:%.*]], <4 x float> [[POW:%.*]]) ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %in, <4 x float> %pow)